summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/boolean_simplifier.cc39
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc1248
-rw-r--r--compiler/optimizing/bounds_check_elimination.h7
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc2
-rw-r--r--compiler/optimizing/builder.cc356
-rw-r--r--compiler/optimizing/builder.h20
-rw-r--r--compiler/optimizing/code_generator.cc93
-rw-r--r--compiler/optimizing/code_generator.h44
-rw-r--r--compiler/optimizing/code_generator_arm.cc1421
-rw-r--r--compiler/optimizing/code_generator_arm.h87
-rw-r--r--compiler/optimizing/code_generator_arm64.cc1391
-rw-r--r--compiler/optimizing/code_generator_arm64.h76
-rw-r--r--compiler/optimizing/code_generator_mips.cc248
-rw-r--r--compiler/optimizing/code_generator_mips.h4
-rw-r--r--compiler/optimizing/code_generator_mips64.cc774
-rw-r--r--compiler/optimizing/code_generator_mips64.h44
-rw-r--r--compiler/optimizing/code_generator_utils.cc5
-rw-r--r--compiler/optimizing/code_generator_utils.h7
-rw-r--r--compiler/optimizing/code_generator_x86.cc1379
-rw-r--r--compiler/optimizing/code_generator_x86.h69
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc1411
-rw-r--r--compiler/optimizing/code_generator_x86_64.h63
-rw-r--r--compiler/optimizing/common_arm64.h62
-rw-r--r--compiler/optimizing/common_dominator.h93
-rw-r--r--compiler/optimizing/dead_code_elimination.cc12
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_arm.cc92
-rw-r--r--compiler/optimizing/dex_cache_array_fixups_arm.h37
-rw-r--r--compiler/optimizing/graph_checker.cc127
-rw-r--r--compiler/optimizing/graph_checker.h3
-rw-r--r--compiler/optimizing/graph_visualizer.cc52
-rw-r--r--compiler/optimizing/gvn.cc6
-rw-r--r--compiler/optimizing/induction_var_analysis_test.cc7
-rw-r--r--compiler/optimizing/induction_var_range.cc220
-rw-r--r--compiler/optimizing/induction_var_range.h71
-rw-r--r--compiler/optimizing/induction_var_range_test.cc274
-rw-r--r--compiler/optimizing/inliner.cc21
-rw-r--r--compiler/optimizing/instruction_simplifier.cc48
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc273
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h21
-rw-r--r--compiler/optimizing/intrinsics.cc9
-rw-r--r--compiler/optimizing/intrinsics_arm.cc50
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc46
-rw-r--r--compiler/optimizing/intrinsics_mips.cc325
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc117
-rw-r--r--compiler/optimizing/intrinsics_x86.cc74
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc77
-rw-r--r--compiler/optimizing/licm_test.cc6
-rw-r--r--compiler/optimizing/load_store_elimination.cc292
-rw-r--r--compiler/optimizing/locations.cc37
-rw-r--r--compiler/optimizing/locations.h10
-rw-r--r--compiler/optimizing/nodes.cc492
-rw-r--r--compiler/optimizing/nodes.h266
-rw-r--r--compiler/optimizing/nodes_arm.h59
-rw-r--r--compiler/optimizing/nodes_arm64.cc84
-rw-r--r--compiler/optimizing/nodes_arm64.h107
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc180
-rw-r--r--compiler/optimizing/optimizing_cfi_test_expected.inc329
-rw-r--r--compiler/optimizing/optimizing_compiler.cc451
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h101
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc (renamed from compiler/optimizing/constant_area_fixups_x86.cc)51
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h (renamed from compiler/optimizing/constant_area_fixups_x86.h)12
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc151
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h3
-rw-r--r--compiler/optimizing/primitive_type_propagation.cc1
-rw-r--r--compiler/optimizing/reference_type_propagation.cc186
-rw-r--r--compiler/optimizing/reference_type_propagation.h3
-rw-r--r--compiler/optimizing/register_allocator.cc6
-rw-r--r--compiler/optimizing/sharpening.cc16
-rw-r--r--compiler/optimizing/side_effects_test.cc26
-rw-r--r--compiler/optimizing/ssa_builder.cc118
-rw-r--r--compiler/optimizing/ssa_builder.h1
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc42
72 files changed, 10316 insertions, 3619 deletions
diff --git a/compiler/optimizing/boolean_simplifier.cc b/compiler/optimizing/boolean_simplifier.cc
index f985745e7a..f0cafc847f 100644
--- a/compiler/optimizing/boolean_simplifier.cc
+++ b/compiler/optimizing/boolean_simplifier.cc
@@ -61,40 +61,6 @@ static bool NegatesCondition(HInstruction* input_true, HInstruction* input_false
&& input_false->IsIntConstant() && input_false->AsIntConstant()->IsOne();
}
-// Returns an instruction with the opposite boolean value from 'cond'.
-static HInstruction* GetOppositeCondition(HInstruction* cond) {
- HGraph* graph = cond->GetBlock()->GetGraph();
- ArenaAllocator* allocator = graph->GetArena();
-
- if (cond->IsCondition()) {
- HInstruction* lhs = cond->InputAt(0);
- HInstruction* rhs = cond->InputAt(1);
- switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite*
- case kCondEQ: return new (allocator) HEqual(lhs, rhs);
- case kCondNE: return new (allocator) HNotEqual(lhs, rhs);
- case kCondLT: return new (allocator) HLessThan(lhs, rhs);
- case kCondLE: return new (allocator) HLessThanOrEqual(lhs, rhs);
- case kCondGT: return new (allocator) HGreaterThan(lhs, rhs);
- case kCondGE: return new (allocator) HGreaterThanOrEqual(lhs, rhs);
- case kCondB: return new (allocator) HBelow(lhs, rhs);
- case kCondBE: return new (allocator) HBelowOrEqual(lhs, rhs);
- case kCondA: return new (allocator) HAbove(lhs, rhs);
- case kCondAE: return new (allocator) HAboveOrEqual(lhs, rhs);
- }
- } else if (cond->IsIntConstant()) {
- HIntConstant* int_const = cond->AsIntConstant();
- if (int_const->IsZero()) {
- return graph->GetIntConstant(1);
- } else {
- DCHECK(int_const->IsOne());
- return graph->GetIntConstant(0);
- }
- }
- // General case when 'cond' is another instruction of type boolean,
- // as verified by SSAChecker.
- return new (allocator) HBooleanNot(cond);
-}
-
void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
DCHECK(block->EndsWithIf());
@@ -126,10 +92,7 @@ void HBooleanSimplifier::TryRemovingBooleanSelection(HBasicBlock* block) {
HInstruction* replacement;
if (NegatesCondition(true_value, false_value)) {
- replacement = GetOppositeCondition(if_condition);
- if (replacement->GetBlock() == nullptr) {
- block->InsertInstructionBefore(replacement, if_instruction);
- }
+ replacement = graph_->InsertOppositeCondition(if_condition, if_instruction);
} else if (PreservesCondition(true_value, false_value)) {
replacement = if_condition;
} else {
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index bcc32403d3..a44830207d 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -20,6 +20,7 @@
#include "base/arena_containers.h"
#include "induction_var_range.h"
+#include "side_effects_analysis.h"
#include "nodes.h"
namespace art {
@@ -175,6 +176,24 @@ class ValueBound : public ValueObject {
return false;
}
+ // Returns if it's certain this->bound > `bound`.
+ bool GreaterThan(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ > bound.constant_;
+ }
+ // Not comparable. Just return false.
+ return false;
+ }
+
+ // Returns if it's certain this->bound < `bound`.
+ bool LessThan(ValueBound bound) const {
+ if (Equal(instruction_, bound.instruction_)) {
+ return constant_ < bound.constant_;
+ }
+ // Not comparable. Just return false.
+ return false;
+ }
+
// Try to narrow lower bound. Returns the greatest of the two if possible.
// Pick one if they are not comparable.
static ValueBound NarrowLowerBound(ValueBound bound1, ValueBound bound2) {
@@ -252,157 +271,6 @@ class ValueBound : public ValueObject {
int32_t constant_;
};
-// Collect array access data for a loop.
-// TODO: make it work for multiple arrays inside the loop.
-class ArrayAccessInsideLoopFinder : public ValueObject {
- public:
- explicit ArrayAccessInsideLoopFinder(HInstruction* induction_variable)
- : induction_variable_(induction_variable),
- found_array_length_(nullptr),
- offset_low_(std::numeric_limits<int32_t>::max()),
- offset_high_(std::numeric_limits<int32_t>::min()) {
- Run();
- }
-
- HArrayLength* GetFoundArrayLength() const { return found_array_length_; }
- bool HasFoundArrayLength() const { return found_array_length_ != nullptr; }
- int32_t GetOffsetLow() const { return offset_low_; }
- int32_t GetOffsetHigh() const { return offset_high_; }
-
- // Returns if `block` that is in loop_info may exit the loop, unless it's
- // the loop header for loop_info.
- static bool EarlyExit(HBasicBlock* block, HLoopInformation* loop_info) {
- DCHECK(loop_info->Contains(*block));
- if (block == loop_info->GetHeader()) {
- // Loop header of loop_info. Exiting loop is normal.
- return false;
- }
- for (HBasicBlock* successor : block->GetSuccessors()) {
- if (!loop_info->Contains(*successor)) {
- // One of the successors exits the loop.
- return true;
- }
- }
- return false;
- }
-
- static bool DominatesAllBackEdges(HBasicBlock* block, HLoopInformation* loop_info) {
- for (HBasicBlock* back_edge : loop_info->GetBackEdges()) {
- if (!block->Dominates(back_edge)) {
- return false;
- }
- }
- return true;
- }
-
- void Run() {
- HLoopInformation* loop_info = induction_variable_->GetBlock()->GetLoopInformation();
- HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
- HBasicBlock* block = it_loop.Current();
- DCHECK(block == induction_variable_->GetBlock());
- // Skip loop header. Since narrowed value range of a MonotonicValueRange only
- // applies to the loop body (after the test at the end of the loop header).
- it_loop.Advance();
- for (; !it_loop.Done(); it_loop.Advance()) {
- block = it_loop.Current();
- DCHECK(block->IsInLoop());
- if (!DominatesAllBackEdges(block, loop_info)) {
- // In order not to trigger deoptimization unnecessarily, make sure
- // that all array accesses collected are really executed in the loop.
- // For array accesses in a branch inside the loop, don't collect the
- // access. The bounds check in that branch might not be eliminated.
- continue;
- }
- if (EarlyExit(block, loop_info)) {
- // If the loop body can exit loop (like break, return, etc.), it's not guaranteed
- // that the loop will loop through the full monotonic value range from
- // initial_ to end_. So adding deoptimization might be too aggressive and can
- // trigger deoptimization unnecessarily even if the loop won't actually throw
- // AIOOBE.
- found_array_length_ = nullptr;
- return;
- }
- for (HInstruction* instruction = block->GetFirstInstruction();
- instruction != nullptr;
- instruction = instruction->GetNext()) {
- if (!instruction->IsBoundsCheck()) {
- continue;
- }
-
- HInstruction* length_value = instruction->InputAt(1);
- if (length_value->IsIntConstant()) {
- // TODO: may optimize for constant case.
- continue;
- }
-
- if (length_value->IsPhi()) {
- // When adding deoptimizations in outer loops, we might create
- // a phi for the array length, and update all uses of the
- // length in the loop to that phi. Therefore, inner loops having
- // bounds checks on the same array will use that phi.
- // TODO: handle these cases.
- continue;
- }
-
- DCHECK(length_value->IsArrayLength());
- HArrayLength* array_length = length_value->AsArrayLength();
-
- HInstruction* array = array_length->InputAt(0);
- if (array->IsNullCheck()) {
- array = array->AsNullCheck()->InputAt(0);
- }
- if (loop_info->Contains(*array->GetBlock())) {
- // Array is defined inside the loop. Skip.
- continue;
- }
-
- if (found_array_length_ != nullptr && found_array_length_ != array_length) {
- // There is already access for another array recorded for the loop.
- // TODO: handle multiple arrays.
- continue;
- }
-
- HInstruction* index = instruction->AsBoundsCheck()->InputAt(0);
- HInstruction* left = index;
- int32_t right = 0;
- if (left == induction_variable_ ||
- (ValueBound::IsAddOrSubAConstant(index, &left, &right) &&
- left == induction_variable_)) {
- // For patterns like array[i] or array[i + 2].
- if (right < offset_low_) {
- offset_low_ = right;
- }
- if (right > offset_high_) {
- offset_high_ = right;
- }
- } else {
- // Access not in induction_variable/(induction_variable_ + constant)
- // format. Skip.
- continue;
- }
- // Record this array.
- found_array_length_ = array_length;
- }
- }
- }
-
- private:
- // The instruction that corresponds to a MonotonicValueRange.
- HInstruction* induction_variable_;
-
- // The array length of the array that's accessed inside the loop body.
- HArrayLength* found_array_length_;
-
- // The lowest and highest constant offsets relative to induction variable
- // instruction_ in all array accesses.
- // If array access are: array[i-1], array[i], array[i+1],
- // offset_low_ is -1 and offset_high is 1.
- int32_t offset_low_;
- int32_t offset_high_;
-
- DISALLOW_COPY_AND_ASSIGN(ArrayAccessInsideLoopFinder);
-};
-
/**
* Represent a range of lower bound and upper bound, both being inclusive.
* Currently a ValueRange may be generated as a result of the following:
@@ -500,18 +368,13 @@ class MonotonicValueRange : public ValueRange {
: ValueRange(allocator, ValueBound::Min(), ValueBound::Max()),
induction_variable_(induction_variable),
initial_(initial),
- end_(nullptr),
- inclusive_(false),
increment_(increment),
bound_(bound) {}
virtual ~MonotonicValueRange() {}
- HInstruction* GetInductionVariable() const { return induction_variable_; }
int32_t GetIncrement() const { return increment_; }
ValueBound GetBound() const { return bound_; }
- void SetEnd(HInstruction* end) { end_ = end; }
- void SetInclusive(bool inclusive) { inclusive_ = inclusive; }
HBasicBlock* GetLoopHeader() const {
DCHECK(induction_variable_->GetBlock()->IsLoopHeader());
return induction_variable_->GetBlock();
@@ -519,23 +382,6 @@ class MonotonicValueRange : public ValueRange {
MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
- HBasicBlock* GetLoopHeaderSuccesorInLoop() {
- HBasicBlock* header = GetLoopHeader();
- HInstruction* instruction = header->GetLastInstruction();
- DCHECK(instruction->IsIf());
- HIf* h_if = instruction->AsIf();
- HLoopInformation* loop_info = header->GetLoopInformation();
- bool true_successor_in_loop = loop_info->Contains(*h_if->IfTrueSuccessor());
- bool false_successor_in_loop = loop_info->Contains(*h_if->IfFalseSuccessor());
-
- // Just in case it's some strange loop structure.
- if (true_successor_in_loop && false_successor_in_loop) {
- return nullptr;
- }
- DCHECK(true_successor_in_loop || false_successor_in_loop);
- return false_successor_in_loop ? h_if->IfFalseSuccessor() : h_if->IfTrueSuccessor();
- }
-
// If it's certain that this value range fits in other_range.
bool FitsIn(ValueRange* other_range) const OVERRIDE {
if (other_range == nullptr) {
@@ -627,467 +473,9 @@ class MonotonicValueRange : public ValueRange {
}
}
- // Try to add HDeoptimize's in the loop pre-header first to narrow this range.
- // For example, this loop:
- //
- // for (int i = start; i < end; i++) {
- // array[i - 1] = array[i] + array[i + 1];
- // }
- //
- // will be transformed to:
- //
- // int array_length_in_loop_body_if_needed;
- // if (start >= end) {
- // array_length_in_loop_body_if_needed = 0;
- // } else {
- // if (start < 1) deoptimize();
- // if (array == null) deoptimize();
- // array_length = array.length;
- // if (end > array_length - 1) deoptimize;
- // array_length_in_loop_body_if_needed = array_length;
- // }
- // for (int i = start; i < end; i++) {
- // // No more null check and bounds check.
- // // array.length value is replaced with array_length_in_loop_body_if_needed
- // // in the loop body.
- // array[i - 1] = array[i] + array[i + 1];
- // }
- //
- // We basically first go through the loop body and find those array accesses whose
- // index is at a constant offset from the induction variable ('i' in the above example),
- // and update offset_low and offset_high along the way. We then add the following
- // deoptimizations in the loop pre-header (suppose end is not inclusive).
- // if (start < -offset_low) deoptimize();
- // if (end >= array.length - offset_high) deoptimize();
- // It might be necessary to first hoist array.length (and the null check on it) out of
- // the loop with another deoptimization.
- //
- // In order not to trigger deoptimization unnecessarily, we want to make a strong
- // guarantee that no deoptimization is triggered if the loop body itself doesn't
- // throw AIOOBE. (It's the same as saying if deoptimization is triggered, the loop
- // body must throw AIOOBE).
- // This is achieved by the following:
- // 1) We only process loops that iterate through the full monotonic range from
- // initial_ to end_. We do the following checks to make sure that's the case:
- // a) The loop doesn't have early exit (via break, return, etc.)
- // b) The increment_ is 1/-1. An increment of 2, for example, may skip end_.
- // 2) We only collect array accesses of blocks in the loop body that dominate
- // all loop back edges, these array accesses are guaranteed to happen
- // at each loop iteration.
- // With 1) and 2), if the loop body doesn't throw AIOOBE, collected array accesses
- // when the induction variable is at initial_ and end_ must be in a legal range.
- // Since the added deoptimizations are basically checking the induction variable
- // at initial_ and end_ values, no deoptimization will be triggered either.
- //
- // A special case is the loop body isn't entered at all. In that case, we may still
- // add deoptimization due to the analysis described above. In order not to trigger
- // deoptimization, we do a test between initial_ and end_ first and skip over
- // the added deoptimization.
- ValueRange* NarrowWithDeoptimization() {
- if (increment_ != 1 && increment_ != -1) {
- // In order not to trigger deoptimization unnecessarily, we want to
- // make sure the loop iterates through the full range from initial_ to
- // end_ so that boundaries are covered by the loop. An increment of 2,
- // for example, may skip end_.
- return this;
- }
-
- if (end_ == nullptr) {
- // No full info to add deoptimization.
- return this;
- }
-
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
- if (!initial_->GetBlock()->Dominates(pre_header) ||
- !end_->GetBlock()->Dominates(pre_header)) {
- // Can't add a check in loop pre-header if the value isn't available there.
- return this;
- }
-
- ArrayAccessInsideLoopFinder finder(induction_variable_);
-
- if (!finder.HasFoundArrayLength()) {
- // No array access was found inside the loop that can benefit
- // from deoptimization.
- return this;
- }
-
- if (!AddDeoptimization(finder)) {
- return this;
- }
-
- // After added deoptimizations, induction variable fits in
- // [-offset_low, array.length-1-offset_high], adjusted with collected offsets.
- ValueBound lower = ValueBound(0, -finder.GetOffsetLow());
- ValueBound upper = ValueBound(finder.GetFoundArrayLength(), -1 - finder.GetOffsetHigh());
- // We've narrowed the range after added deoptimizations.
- return new (GetAllocator()) ValueRange(GetAllocator(), lower, upper);
- }
-
- // Returns true if adding a (constant >= value) check for deoptimization
- // is allowed and will benefit compiled code.
- bool CanAddDeoptimizationConstant(HInstruction* value, int32_t constant, bool* is_proven) {
- *is_proven = false;
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
- DCHECK(value->GetBlock()->Dominates(pre_header));
-
- // See if we can prove the relationship first.
- if (value->IsIntConstant()) {
- if (value->AsIntConstant()->GetValue() >= constant) {
- // Already true.
- *is_proven = true;
- return true;
- } else {
- // May throw exception. Don't add deoptimization.
- // Keep bounds checks in the loops.
- return false;
- }
- }
- // Can benefit from deoptimization.
- return true;
- }
-
- // Try to filter out cases that the loop entry test will never be true.
- bool LoopEntryTestUseful() {
- if (initial_->IsIntConstant() && end_->IsIntConstant()) {
- int32_t initial_val = initial_->AsIntConstant()->GetValue();
- int32_t end_val = end_->AsIntConstant()->GetValue();
- if (increment_ == 1) {
- if (inclusive_) {
- return initial_val > end_val;
- } else {
- return initial_val >= end_val;
- }
- } else {
- DCHECK_EQ(increment_, -1);
- if (inclusive_) {
- return initial_val < end_val;
- } else {
- return initial_val <= end_val;
- }
- }
- }
- return true;
- }
-
- // Returns the block for adding deoptimization.
- HBasicBlock* TransformLoopForDeoptimizationIfNeeded() {
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
- // Deoptimization is only added when both initial_ and end_ are defined
- // before the loop.
- DCHECK(initial_->GetBlock()->Dominates(pre_header));
- DCHECK(end_->GetBlock()->Dominates(pre_header));
-
- // If it can be proven the loop body is definitely entered (unless exception
- // is thrown in the loop header for which triggering deoptimization is fine),
- // there is no need for tranforming the loop. In that case, deoptimization
- // will just be added in the loop pre-header.
- if (!LoopEntryTestUseful()) {
- return pre_header;
- }
-
- HGraph* graph = header->GetGraph();
- graph->TransformLoopHeaderForBCE(header);
- HBasicBlock* new_pre_header = header->GetDominator();
- DCHECK(new_pre_header == header->GetLoopInformation()->GetPreHeader());
- HBasicBlock* if_block = new_pre_header->GetDominator();
- HBasicBlock* dummy_block = if_block->GetSuccessors()[0]; // True successor.
- HBasicBlock* deopt_block = if_block->GetSuccessors()[1]; // False successor.
-
- dummy_block->AddInstruction(new (graph->GetArena()) HGoto());
- deopt_block->AddInstruction(new (graph->GetArena()) HGoto());
- new_pre_header->AddInstruction(new (graph->GetArena()) HGoto());
- return deopt_block;
- }
-
- // Adds a test between initial_ and end_ to see if the loop body is entered.
- // If the loop body isn't entered at all, it jumps to the loop pre-header (after
- // transformation) to avoid any deoptimization.
- void AddLoopBodyEntryTest() {
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
- HBasicBlock* if_block = pre_header->GetDominator();
- HGraph* graph = header->GetGraph();
-
- HCondition* cond;
- if (increment_ == 1) {
- if (inclusive_) {
- cond = new (graph->GetArena()) HGreaterThan(initial_, end_);
- } else {
- cond = new (graph->GetArena()) HGreaterThanOrEqual(initial_, end_);
- }
- } else {
- DCHECK_EQ(increment_, -1);
- if (inclusive_) {
- cond = new (graph->GetArena()) HLessThan(initial_, end_);
- } else {
- cond = new (graph->GetArena()) HLessThanOrEqual(initial_, end_);
- }
- }
- HIf* h_if = new (graph->GetArena()) HIf(cond);
- if_block->AddInstruction(cond);
- if_block->AddInstruction(h_if);
- }
-
- // Adds a check that (value >= constant), and HDeoptimize otherwise.
- void AddDeoptimizationConstant(HInstruction* value,
- int32_t constant,
- HBasicBlock* deopt_block,
- bool loop_entry_test_block_added) {
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetDominator();
- if (loop_entry_test_block_added) {
- DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
- } else {
- DCHECK(deopt_block == pre_header);
- }
- HGraph* graph = header->GetGraph();
- HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
- if (loop_entry_test_block_added) {
- DCHECK_EQ(deopt_block, header->GetDominator()->GetDominator()->GetSuccessors()[1]);
- }
-
- HIntConstant* const_instr = graph->GetIntConstant(constant);
- HCondition* cond = new (graph->GetArena()) HLessThan(value, const_instr);
- HDeoptimize* deoptimize = new (graph->GetArena())
- HDeoptimize(cond, suspend_check->GetDexPc());
- deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
- deopt_block->InsertInstructionBefore(deoptimize, deopt_block->GetLastInstruction());
- deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
- suspend_check->GetEnvironment(), header);
- }
-
- // Returns true if adding a (value <= array_length + offset) check for deoptimization
- // is allowed and will benefit compiled code.
- bool CanAddDeoptimizationArrayLength(HInstruction* value,
- HArrayLength* array_length,
- int32_t offset,
- bool* is_proven) {
- *is_proven = false;
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetLoopInformation()->GetPreHeader();
- DCHECK(value->GetBlock()->Dominates(pre_header));
-
- if (array_length->GetBlock() == header) {
- // array_length_in_loop_body_if_needed only has correct value when the loop
- // body is entered. We bail out in this case. Usually array_length defined
- // in the loop header is already hoisted by licm.
- return false;
- } else {
- // array_length is defined either before the loop header already, or in
- // the loop body since it's used in the loop body. If it's defined in the loop body,
- // a phi array_length_in_loop_body_if_needed is used to replace it. In that case,
- // all the uses of array_length must be dominated by its definition in the loop
- // body. array_length_in_loop_body_if_needed is guaranteed to be the same as
- // array_length once the loop body is entered so all the uses of the phi will
- // use the correct value.
- }
-
- if (offset > 0) {
- // There might be overflow issue.
- // TODO: handle this, possibly with some distance relationship between
- // offset_low and offset_high, or using another deoptimization to make
- // sure (array_length + offset) doesn't overflow.
- return false;
- }
-
- // See if we can prove the relationship first.
- if (value == array_length) {
- if (offset >= 0) {
- // Already true.
- *is_proven = true;
- return true;
- } else {
- // May throw exception. Don't add deoptimization.
- // Keep bounds checks in the loops.
- return false;
- }
- }
- // Can benefit from deoptimization.
- return true;
- }
-
- // Adds a check that (value <= array_length + offset), and HDeoptimize otherwise.
- void AddDeoptimizationArrayLength(HInstruction* value,
- HArrayLength* array_length,
- int32_t offset,
- HBasicBlock* deopt_block,
- bool loop_entry_test_block_added) {
- HBasicBlock* header = induction_variable_->GetBlock();
- DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetDominator();
- if (loop_entry_test_block_added) {
- DCHECK(deopt_block->GetSuccessors()[0] == pre_header);
- } else {
- DCHECK(deopt_block == pre_header);
- }
- HGraph* graph = header->GetGraph();
- HSuspendCheck* suspend_check = header->GetLoopInformation()->GetSuspendCheck();
-
- // We may need to hoist null-check and array_length out of loop first.
- if (!array_length->GetBlock()->Dominates(deopt_block)) {
- // array_length must be defined in the loop body.
- DCHECK(header->GetLoopInformation()->Contains(*array_length->GetBlock()));
- DCHECK(array_length->GetBlock() != header);
-
- HInstruction* array = array_length->InputAt(0);
- HNullCheck* null_check = array->AsNullCheck();
- if (null_check != nullptr) {
- array = null_check->InputAt(0);
- }
- // We've already made sure the array is defined before the loop when collecting
- // array accesses for the loop.
- DCHECK(array->GetBlock()->Dominates(deopt_block));
- if (null_check != nullptr && !null_check->GetBlock()->Dominates(deopt_block)) {
- // Hoist null check out of loop with a deoptimization.
- HNullConstant* null_constant = graph->GetNullConstant();
- HCondition* null_check_cond = new (graph->GetArena()) HEqual(array, null_constant);
- // TODO: for one dex_pc, share the same deoptimization slow path.
- HDeoptimize* null_check_deoptimize = new (graph->GetArena())
- HDeoptimize(null_check_cond, suspend_check->GetDexPc());
- deopt_block->InsertInstructionBefore(
- null_check_cond, deopt_block->GetLastInstruction());
- deopt_block->InsertInstructionBefore(
- null_check_deoptimize, deopt_block->GetLastInstruction());
- // Eliminate null check in the loop.
- null_check->ReplaceWith(array);
- null_check->GetBlock()->RemoveInstruction(null_check);
- null_check_deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
- suspend_check->GetEnvironment(), header);
- }
-
- HArrayLength* new_array_length
- = new (graph->GetArena()) HArrayLength(array, array->GetDexPc());
- deopt_block->InsertInstructionBefore(new_array_length, deopt_block->GetLastInstruction());
-
- if (loop_entry_test_block_added) {
- // Replace array_length defined inside the loop body with a phi
- // array_length_in_loop_body_if_needed. This is a synthetic phi so there is
- // no vreg number for it.
- HPhi* phi = new (graph->GetArena()) HPhi(
- graph->GetArena(), kNoRegNumber, 2, Primitive::kPrimInt);
- // Set to 0 if the loop body isn't entered.
- phi->SetRawInputAt(0, graph->GetIntConstant(0));
- // Set to array.length if the loop body is entered.
- phi->SetRawInputAt(1, new_array_length);
- pre_header->AddPhi(phi);
- array_length->ReplaceWith(phi);
- // Make sure phi is only used after the loop body is entered.
- if (kIsDebugBuild) {
- for (HUseIterator<HInstruction*> it(phi->GetUses());
- !it.Done();
- it.Advance()) {
- HInstruction* user = it.Current()->GetUser();
- DCHECK(GetLoopHeaderSuccesorInLoop()->Dominates(user->GetBlock()));
- }
- }
- } else {
- array_length->ReplaceWith(new_array_length);
- }
-
- array_length->GetBlock()->RemoveInstruction(array_length);
- // Use new_array_length for deopt.
- array_length = new_array_length;
- }
-
- HInstruction* added = array_length;
- if (offset != 0) {
- HIntConstant* offset_instr = graph->GetIntConstant(offset);
- added = new (graph->GetArena()) HAdd(Primitive::kPrimInt, array_length, offset_instr);
- deopt_block->InsertInstructionBefore(added, deopt_block->GetLastInstruction());
- }
- HCondition* cond = new (graph->GetArena()) HGreaterThan(value, added);
- HDeoptimize* deopt = new (graph->GetArena()) HDeoptimize(cond, suspend_check->GetDexPc());
- deopt_block->InsertInstructionBefore(cond, deopt_block->GetLastInstruction());
- deopt_block->InsertInstructionBefore(deopt, deopt_block->GetLastInstruction());
- deopt->CopyEnvironmentFromWithLoopPhiAdjustment(suspend_check->GetEnvironment(), header);
- }
-
- // Adds deoptimizations in loop pre-header with the collected array access
- // data so that value ranges can be established in loop body.
- // Returns true if deoptimizations are successfully added, or if it's proven
- // it's not necessary.
- bool AddDeoptimization(const ArrayAccessInsideLoopFinder& finder) {
- int32_t offset_low = finder.GetOffsetLow();
- int32_t offset_high = finder.GetOffsetHigh();
- HArrayLength* array_length = finder.GetFoundArrayLength();
-
- HBasicBlock* pre_header =
- induction_variable_->GetBlock()->GetLoopInformation()->GetPreHeader();
- if (!initial_->GetBlock()->Dominates(pre_header) ||
- !end_->GetBlock()->Dominates(pre_header)) {
- // Can't move initial_ or end_ into pre_header for comparisons.
- return false;
- }
-
- HBasicBlock* deopt_block;
- bool loop_entry_test_block_added = false;
- bool is_constant_proven, is_length_proven;
-
- HInstruction* const_comparing_instruction;
- int32_t const_compared_to;
- HInstruction* array_length_comparing_instruction;
- int32_t array_length_offset;
- if (increment_ == 1) {
- // Increasing from initial_ to end_.
- const_comparing_instruction = initial_;
- const_compared_to = -offset_low;
- array_length_comparing_instruction = end_;
- array_length_offset = inclusive_ ? -offset_high - 1 : -offset_high;
- } else {
- const_comparing_instruction = end_;
- const_compared_to = inclusive_ ? -offset_low : -offset_low - 1;
- array_length_comparing_instruction = initial_;
- array_length_offset = -offset_high - 1;
- }
-
- if (CanAddDeoptimizationConstant(const_comparing_instruction,
- const_compared_to,
- &is_constant_proven) &&
- CanAddDeoptimizationArrayLength(array_length_comparing_instruction,
- array_length,
- array_length_offset,
- &is_length_proven)) {
- if (!is_constant_proven || !is_length_proven) {
- deopt_block = TransformLoopForDeoptimizationIfNeeded();
- loop_entry_test_block_added = (deopt_block != pre_header);
- if (loop_entry_test_block_added) {
- // Loop body may be entered.
- AddLoopBodyEntryTest();
- }
- }
- if (!is_constant_proven) {
- AddDeoptimizationConstant(const_comparing_instruction,
- const_compared_to,
- deopt_block,
- loop_entry_test_block_added);
- }
- if (!is_length_proven) {
- AddDeoptimizationArrayLength(array_length_comparing_instruction,
- array_length,
- array_length_offset,
- deopt_block,
- loop_entry_test_block_added);
- }
- return true;
- }
- return false;
- }
-
private:
HPhi* const induction_variable_; // Induction variable for this monotonic value range.
HInstruction* const initial_; // Initial value.
- HInstruction* end_; // End value.
- bool inclusive_; // Whether end value is inclusive.
const int32_t increment_; // Increment for each loop iteration.
const ValueBound bound_; // Additional value bound info for initial_.
@@ -1111,7 +499,9 @@ class BCEVisitor : public HGraphVisitor {
return block->GetBlockId() >= initial_block_size_;
}
- BCEVisitor(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+ BCEVisitor(HGraph* graph,
+ const SideEffectsAnalysis& side_effects,
+ HInductionVarAnalysis* induction_analysis)
: HGraphVisitor(graph),
maps_(graph->GetBlocks().size(),
ArenaSafeMap<int, ValueRange*>(
@@ -1121,8 +511,17 @@ class BCEVisitor : public HGraphVisitor {
first_constant_index_bounds_check_map_(
std::less<int>(),
graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+ early_exit_loop_(
+ std::less<uint32_t>(),
+ graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+ taken_test_loop_(
+ std::less<uint32_t>(),
+ graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
+ finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)),
need_to_revisit_block_(false),
+ has_deoptimization_on_constant_subscripts_(false),
initial_block_size_(graph->GetBlocks().size()),
+ side_effects_(side_effects),
induction_range_(induction_analysis) {}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -1138,6 +537,17 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ void Finish() {
+ // Preserve SSA structure which may have been broken by adding one or more
+ // new taken-test structures (see TransformLoopForDeoptimizationIfNeeded()).
+ InsertPhiNodes();
+
+ // Clear the loop data structures.
+ early_exit_loop_.clear();
+ taken_test_loop_.clear();
+ finite_loop_.clear();
+ }
+
private:
// Return the map of proven value ranges at the beginning of a basic block.
ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) {
@@ -1166,23 +576,6 @@ class BCEVisitor : public HGraphVisitor {
return nullptr;
}
- // Return the range resulting from induction variable analysis of "instruction" when the value
- // is used from "context", for example, an index used from a bounds-check inside a loop body.
- ValueRange* LookupInductionRange(HInstruction* context, HInstruction* instruction) {
- InductionVarRange::Value v1 = induction_range_.GetMinInduction(context, instruction);
- InductionVarRange::Value v2 = induction_range_.GetMaxInduction(context, instruction);
- if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
- v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
- DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
- DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
- ValueBound low = ValueBound(v1.instruction, v1.b_constant);
- ValueBound up = ValueBound(v2.instruction, v2.b_constant);
- return new (GetGraph()->GetArena()) ValueRange(GetGraph()->GetArena(), low, up);
- }
- // Didn't find anything useful.
- return nullptr;
- }
-
// Narrow the value range of `instruction` at the end of `basic_block` with `range`,
// and push the narrowed value range to `successor`.
void ApplyRangeFromComparison(HInstruction* instruction, HBasicBlock* basic_block,
@@ -1328,17 +721,6 @@ class BCEVisitor : public HGraphVisitor {
bool overflow, underflow;
if (cond == kCondLT || cond == kCondLE) {
- if (left_monotonic_range != nullptr) {
- // Update the info for monotonic value range.
- if (left_monotonic_range->GetInductionVariable() == left &&
- left_monotonic_range->GetIncrement() < 0 &&
- block == left_monotonic_range->GetLoopHeader() &&
- instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
- left_monotonic_range->SetEnd(right);
- left_monotonic_range->SetInclusive(cond == kCondLT);
- }
- }
-
if (!upper.Equals(ValueBound::Max())) {
int32_t compensation = (cond == kCondLT) ? -1 : 0; // upper bound is inclusive
ValueBound new_upper = upper.Add(compensation, &overflow, &underflow);
@@ -1362,17 +744,6 @@ class BCEVisitor : public HGraphVisitor {
ApplyRangeFromComparison(left, block, false_successor, new_range);
}
} else if (cond == kCondGT || cond == kCondGE) {
- if (left_monotonic_range != nullptr) {
- // Update the info for monotonic value range.
- if (left_monotonic_range->GetInductionVariable() == left &&
- left_monotonic_range->GetIncrement() > 0 &&
- block == left_monotonic_range->GetLoopHeader() &&
- instruction->IfFalseSuccessor()->GetLoopInformation() == block->GetLoopInformation()) {
- left_monotonic_range->SetEnd(right);
- left_monotonic_range->SetInclusive(cond == kCondGT);
- }
- }
-
// array.length as a lower bound isn't considered useful.
if (!lower.Equals(ValueBound::Min()) && !lower.IsRelatedToArrayLength()) {
int32_t compensation = (cond == kCondGT) ? 1 : 0; // lower bound is inclusive
@@ -1398,38 +769,34 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitBoundsCheck(HBoundsCheck* bounds_check) {
+ void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
HBasicBlock* block = bounds_check->GetBlock();
HInstruction* index = bounds_check->InputAt(0);
HInstruction* array_length = bounds_check->InputAt(1);
DCHECK(array_length->IsIntConstant() ||
array_length->IsArrayLength() ||
array_length->IsPhi());
-
- if (array_length->IsPhi()) {
- // Input 1 of the phi contains the real array.length once the loop body is
- // entered. That value will be used for bound analysis. The graph is still
- // strictly in SSA form.
- array_length = array_length->AsPhi()->InputAt(1)->AsArrayLength();
- }
+ bool try_dynamic_bce = true;
if (!index->IsIntConstant()) {
+ // Non-constant subscript.
ValueBound lower = ValueBound(nullptr, 0); // constant 0
ValueBound upper = ValueBound(array_length, -1); // array_length - 1
ValueRange array_range(GetGraph()->GetArena(), lower, upper);
- // Try range obtained by local analysis.
+ // Try range obtained by dominator-based analysis.
ValueRange* index_range = LookupValueRange(index, block);
if (index_range != nullptr && index_range->FitsIn(&array_range)) {
- ReplaceBoundsCheck(bounds_check, index);
+ ReplaceInstruction(bounds_check, index);
return;
}
// Try range obtained by induction variable analysis.
- index_range = LookupInductionRange(bounds_check, index);
- if (index_range != nullptr && index_range->FitsIn(&array_range)) {
- ReplaceBoundsCheck(bounds_check, index);
+ // Disables dynamic bce if OOB is certain.
+ if (InductionRangeFitsIn(&array_range, bounds_check, index, &try_dynamic_bce)) {
+ ReplaceInstruction(bounds_check, index);
return;
}
} else {
+ // Constant subscript.
int32_t constant = index->AsIntConstant()->GetValue();
if (constant < 0) {
// Will always throw exception.
@@ -1437,7 +804,7 @@ class BCEVisitor : public HGraphVisitor {
}
if (array_length->IsIntConstant()) {
if (constant < array_length->AsIntConstant()->GetValue()) {
- ReplaceBoundsCheck(bounds_check, index);
+ ReplaceInstruction(bounds_check, index);
}
return;
}
@@ -1448,7 +815,7 @@ class BCEVisitor : public HGraphVisitor {
ValueBound lower = existing_range->GetLower();
DCHECK(lower.IsConstant());
if (constant < lower.GetConstant()) {
- ReplaceBoundsCheck(bounds_check, index);
+ ReplaceInstruction(bounds_check, index);
return;
} else {
// Existing range isn't strong enough to eliminate the bounds check.
@@ -1483,11 +850,11 @@ class BCEVisitor : public HGraphVisitor {
ValueRange(GetGraph()->GetArena(), lower, upper);
GetValueRangeMap(block)->Overwrite(array_length->GetId(), range);
}
- }
- void ReplaceBoundsCheck(HInstruction* bounds_check, HInstruction* index) {
- bounds_check->ReplaceWith(index);
- bounds_check->GetBlock()->RemoveInstruction(bounds_check);
+ // If static analysis fails, and OOB is not certain, try dynamic elimination.
+ if (try_dynamic_bce) {
+ TryDynamicBCE(bounds_check);
+ }
}
static bool HasSameInputAtBackEdges(HPhi* phi) {
@@ -1506,7 +873,7 @@ class BCEVisitor : public HGraphVisitor {
return true;
}
- void VisitPhi(HPhi* phi) {
+ void VisitPhi(HPhi* phi) OVERRIDE {
if (phi->IsLoopHeaderPhi()
&& (phi->GetType() == Primitive::kPrimInt)
&& HasSameInputAtBackEdges(phi)) {
@@ -1553,7 +920,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitIf(HIf* instruction) {
+ void VisitIf(HIf* instruction) OVERRIDE {
if (instruction->InputAt(0)->IsCondition()) {
HCondition* cond = instruction->InputAt(0)->AsCondition();
IfCondition cmp = cond->GetCondition();
@@ -1562,42 +929,11 @@ class BCEVisitor : public HGraphVisitor {
HInstruction* left = cond->GetLeft();
HInstruction* right = cond->GetRight();
HandleIf(instruction, left, right, cmp);
-
- HBasicBlock* block = instruction->GetBlock();
- ValueRange* left_range = LookupValueRange(left, block);
- if (left_range == nullptr) {
- return;
- }
-
- if (left_range->IsMonotonicValueRange() &&
- block == left_range->AsMonotonicValueRange()->GetLoopHeader()) {
- // The comparison is for an induction variable in the loop header.
- DCHECK(left == left_range->AsMonotonicValueRange()->GetInductionVariable());
- HBasicBlock* loop_body_successor =
- left_range->AsMonotonicValueRange()->GetLoopHeaderSuccesorInLoop();
- if (loop_body_successor == nullptr) {
- // In case it's some strange loop structure.
- return;
- }
- ValueRange* new_left_range = LookupValueRange(left, loop_body_successor);
- if ((new_left_range == left_range) ||
- // Range narrowed with deoptimization is usually more useful than
- // a constant range.
- new_left_range->IsConstantValueRange()) {
- // We are not successful in narrowing the monotonic value range to
- // a regular value range. Try using deoptimization.
- new_left_range = left_range->AsMonotonicValueRange()->
- NarrowWithDeoptimization();
- if (new_left_range != left_range) {
- GetValueRangeMap(loop_body_successor)->Overwrite(left->GetId(), new_left_range);
- }
- }
- }
}
}
}
- void VisitAdd(HAdd* add) {
+ void VisitAdd(HAdd* add) OVERRIDE {
HInstruction* right = add->GetRight();
if (right->IsIntConstant()) {
ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1611,7 +947,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitSub(HSub* sub) {
+ void VisitSub(HSub* sub) OVERRIDE {
HInstruction* left = sub->GetLeft();
HInstruction* right = sub->GetRight();
if (right->IsIntConstant()) {
@@ -1713,19 +1049,19 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitDiv(HDiv* div) {
+ void VisitDiv(HDiv* div) OVERRIDE {
FindAndHandlePartialArrayLength(div);
}
- void VisitShr(HShr* shr) {
+ void VisitShr(HShr* shr) OVERRIDE {
FindAndHandlePartialArrayLength(shr);
}
- void VisitUShr(HUShr* ushr) {
+ void VisitUShr(HUShr* ushr) OVERRIDE {
FindAndHandlePartialArrayLength(ushr);
}
- void VisitAnd(HAnd* instruction) {
+ void VisitAnd(HAnd* instruction) OVERRIDE {
if (instruction->GetRight()->IsIntConstant()) {
int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
if (constant > 0) {
@@ -1740,7 +1076,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitNewArray(HNewArray* new_array) {
+ void VisitNewArray(HNewArray* new_array) OVERRIDE {
HInstruction* len = new_array->InputAt(0);
if (!len->IsIntConstant()) {
HInstruction *left;
@@ -1764,9 +1100,12 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitDeoptimize(HDeoptimize* deoptimize) {
- // Right now it's only HLessThanOrEqual.
- DCHECK(deoptimize->InputAt(0)->IsLessThanOrEqual());
+ void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+ if (!deoptimize->InputAt(0)->IsLessThanOrEqual()) {
+ return;
+ }
+ // If this instruction was added by AddCompareWithDeoptimization(), narrow
+ // the range accordingly in subsequent basic blocks.
HLessThanOrEqual* less_than_or_equal = deoptimize->InputAt(0)->AsLessThanOrEqual();
HInstruction* instruction = less_than_or_equal->InputAt(0);
if (instruction->IsArrayLength()) {
@@ -1780,6 +1119,35 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ /**
+ * After null/bounds checks are eliminated, some invariant array references
+ * may be exposed underneath which can be hoisted out of the loop to the
+ * preheader or, in combination with dynamic bce, the deoptimization block.
+ *
+ * for (int i = 0; i < n; i++) {
+ * <-------+
+ * for (int j = 0; j < n; j++) |
+ * a[i][j] = 0; --a[i]--+
+ * }
+ *
+ * Note: this optimization is no longer applied after deoptimization on array references
+ * with constant subscripts has occurred (see AddCompareWithDeoptimization()), since in
+ * those cases it would be unsafe to hoist array references across their deoptimization
+ * instruction inside a loop.
+ */
+ void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+ if (!has_deoptimization_on_constant_subscripts_ && array_get->IsInLoop()) {
+ HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
+ if (loop->IsLoopInvariant(array_get->InputAt(0), false) &&
+ loop->IsLoopInvariant(array_get->InputAt(1), false)) {
+ SideEffects loop_effects = side_effects_.GetLoopEffects(loop->GetHeader());
+ if (!array_get->GetSideEffects().MayDependOn(loop_effects)) {
+ HoistToPreheaderOrDeoptBlock(loop, array_get);
+ }
+ }
+ }
+ }
+
void AddCompareWithDeoptimization(HInstruction* array_length,
HIntConstant* const_instr,
HBasicBlock* block) {
@@ -1801,6 +1169,9 @@ class BCEVisitor : public HGraphVisitor {
block->InsertInstructionBefore(cond, bounds_check);
block->InsertInstructionBefore(deoptimize, bounds_check);
deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
+ // Flag that this kind of deoptimization on array references with constant
+ // subscripts has occurred to prevent further hoisting of these references.
+ has_deoptimization_on_constant_subscripts_ = true;
}
void AddComparesWithDeoptimization(HBasicBlock* block) {
@@ -1844,21 +1215,425 @@ class BCEVisitor : public HGraphVisitor {
}
}
+ /**
+ * Returns true if static range analysis based on induction variables can determine the bounds
+ * check on the given array range is always satisfied with the computed index range. The output
+ * parameter try_dynamic_bce is set to false if OOB is certain.
+ */
+ bool InductionRangeFitsIn(ValueRange* array_range,
+ HInstruction* context,
+ HInstruction* index,
+ bool* try_dynamic_bce) {
+ InductionVarRange::Value v1;
+ InductionVarRange::Value v2;
+ bool needs_finite_test = false;
+ induction_range_.GetInductionRange(context, index, &v1, &v2, &needs_finite_test);
+ if (v1.is_known && (v1.a_constant == 0 || v1.a_constant == 1) &&
+ v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) {
+ DCHECK(v1.a_constant == 1 || v1.instruction == nullptr);
+ DCHECK(v2.a_constant == 1 || v2.instruction == nullptr);
+ ValueRange index_range(GetGraph()->GetArena(),
+ ValueBound(v1.instruction, v1.b_constant),
+ ValueBound(v2.instruction, v2.b_constant));
+ // If analysis reveals a certain OOB, disable dynamic BCE.
+ *try_dynamic_bce = !index_range.GetLower().LessThan(array_range->GetLower()) &&
+ !index_range.GetUpper().GreaterThan(array_range->GetUpper());
+ // Use analysis for static bce only if loop is finite.
+ return !needs_finite_test && index_range.FitsIn(array_range);
+ }
+ return false;
+ }
+
+ /**
+ * When the compiler fails to remove a bounds check statically, we try to remove the bounds
+ * check dynamically by adding runtime tests that trigger a deoptimization in case bounds
+ * will go out of range (we want to be rather certain of that given the slowdown of
+ * deoptimization). If no deoptimization occurs, the loop is executed with all corresponding
+ * bounds checks and related null checks removed.
+ */
+ void TryDynamicBCE(HBoundsCheck* instruction) {
+ HLoopInformation* loop = instruction->GetBlock()->GetLoopInformation();
+ HInstruction* index = instruction->InputAt(0);
+ HInstruction* length = instruction->InputAt(1);
+ // If dynamic bounds check elimination seems profitable and is possible, then proceed.
+ bool needs_finite_test = false;
+ bool needs_taken_test = false;
+ if (DynamicBCESeemsProfitable(loop, instruction->GetBlock()) &&
+ induction_range_.CanGenerateCode(
+ instruction, index, &needs_finite_test, &needs_taken_test) &&
+ CanHandleInfiniteLoop(loop, index, needs_finite_test) &&
+ CanHandleLength(loop, length, needs_taken_test)) { // do this test last (may code gen)
+ HInstruction* lower = nullptr;
+ HInstruction* upper = nullptr;
+ // Generate the following unsigned comparisons
+ // if (lower > upper) deoptimize;
+ // if (upper >= length) deoptimize;
+ // or, for a non-induction index, just the unsigned comparison on its 'upper' value
+ // if (upper >= length) deoptimize;
+ // as runtime test. By restricting dynamic bce to unit strides (with a maximum of 32-bit
+ // iterations) and by not combining access (e.g. a[i], a[i-3], a[i+5] etc.), these tests
+ // correctly guard against any possible OOB (including arithmetic wrap-around cases).
+ HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ induction_range_.GenerateRangeCode(instruction, index, GetGraph(), block, &lower, &upper);
+ if (lower != nullptr) {
+ InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAbove(lower, upper));
+ }
+ InsertDeopt(loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(upper, length));
+ ReplaceInstruction(instruction, index);
+ }
+ }
+
+ /**
+ * Returns true if heuristics indicate that dynamic bce may be profitable.
+ */
+ bool DynamicBCESeemsProfitable(HLoopInformation* loop, HBasicBlock* block) {
+ if (loop != nullptr) {
+ // A try boundary preheader is hard to handle.
+ // TODO: remove this restriction
+ if (loop->GetPreHeader()->GetLastInstruction()->IsTryBoundary()) {
+ return false;
+ }
+ // Does loop have early-exits? If so, the full range may not be covered by the loop
+ // at runtime and testing the range may apply deoptimization unnecessarily.
+ if (IsEarlyExitLoop(loop)) {
+ return false;
+ }
+ // Does the current basic block dominate all back edges? If not,
+ // don't apply dynamic bce to something that may not be executed.
+ for (HBasicBlock* back_edge : loop->GetBackEdges()) {
+ if (!block->Dominates(back_edge)) {
+ return false;
+ }
+ }
+ // Success!
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the loop has early exits, which implies it may not cover
+ * the full range computed by range analysis based on induction variables.
+ */
+ bool IsEarlyExitLoop(HLoopInformation* loop) {
+ const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+ // If loop has been analyzed earlier for early-exit, don't repeat the analysis.
+ auto it = early_exit_loop_.find(loop_id);
+ if (it != early_exit_loop_.end()) {
+ return it->second;
+ }
+ // First time early-exit analysis for this loop. Since analysis requires scanning
+ // the full loop-body, results of the analysis is stored for subsequent queries.
+ HBlocksInLoopReversePostOrderIterator it_loop(*loop);
+ for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+ for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+ if (!loop->Contains(*successor)) {
+ early_exit_loop_.Put(loop_id, true);
+ return true;
+ }
+ }
+ }
+ early_exit_loop_.Put(loop_id, false);
+ return false;
+ }
+
+ /**
+ * Returns true if the array length is already loop invariant, or can be made so
+ * by handling the null check under the hood of the array length operation.
+ */
+ bool CanHandleLength(HLoopInformation* loop, HInstruction* length, bool needs_taken_test) {
+ if (loop->IsLoopInvariant(length, false)) {
+ return true;
+ } else if (length->IsArrayLength() && length->GetBlock()->GetLoopInformation() == loop) {
+ if (CanHandleNullCheck(loop, length->InputAt(0), needs_taken_test)) {
+ HoistToPreheaderOrDeoptBlock(loop, length);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if the null check is already loop invariant, or can be made so
+ * by generating a deoptimization test.
+ */
+ bool CanHandleNullCheck(HLoopInformation* loop, HInstruction* check, bool needs_taken_test) {
+ if (loop->IsLoopInvariant(check, false)) {
+ return true;
+ } else if (check->IsNullCheck() && check->GetBlock()->GetLoopInformation() == loop) {
+ HInstruction* array = check->InputAt(0);
+ if (loop->IsLoopInvariant(array, false)) {
+ // Generate: if (array == null) deoptimize;
+ HBasicBlock* block = TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test);
+ HInstruction* cond =
+ new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant());
+ InsertDeopt(loop, block, cond);
+ ReplaceInstruction(check, array);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Returns true if compiler can apply dynamic bce to loops that may be infinite
+ * (e.g. for (int i = 0; i <= U; i++) with U = MAX_INT), which would invalidate
+ * the range analysis evaluation code by "overshooting" the computed range.
+ * Since deoptimization would be a bad choice, and there is no other version
+ * of the loop to use, dynamic bce in such cases is only allowed if other tests
+ * ensure the loop is finite.
+ */
+ bool CanHandleInfiniteLoop(
+ HLoopInformation* loop, HInstruction* index, bool needs_infinite_test) {
+ if (needs_infinite_test) {
+ // If we already forced the loop to be finite, allow directly.
+ const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+ if (finite_loop_.find(loop_id) != finite_loop_.end()) {
+ return true;
+ }
+ // Otherwise, allow dynamic bce if the index (which is necessarily an induction at
+ // this point) is the direct loop index (viz. a[i]), since then the runtime tests
+ // ensure upper bound cannot cause an infinite loop.
+ HInstruction* control = loop->GetHeader()->GetLastInstruction();
+ if (control->IsIf()) {
+ HInstruction* if_expr = control->AsIf()->InputAt(0);
+ if (if_expr->IsCondition()) {
+ HCondition* condition = if_expr->AsCondition();
+ if (index == condition->InputAt(0) ||
+ index == condition->InputAt(1)) {
+ finite_loop_.insert(loop_id);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+ return true;
+ }
+
+ /** Inserts a deoptimization test. */
+ void InsertDeopt(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
+ HInstruction* suspend = loop->GetSuspendCheck();
+ block->InsertInstructionBefore(condition, block->GetLastInstruction());
+ HDeoptimize* deoptimize =
+ new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+ block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
+ if (suspend->HasEnvironment()) {
+ deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
+ suspend->GetEnvironment(), loop->GetHeader());
+ }
+ }
+
+ /** Hoists instruction out of the loop to preheader or deoptimization block. */
+ void HoistToPreheaderOrDeoptBlock(HLoopInformation* loop, HInstruction* instruction) {
+ // Use preheader unless there is an earlier generated deoptimization block since
+ // hoisted expressions may depend on and/or used by the deoptimization tests.
+ const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+ HBasicBlock* preheader = loop->GetPreHeader();
+ HBasicBlock* block = preheader;
+ auto it = taken_test_loop_.find(loop_id);
+ if (it != taken_test_loop_.end()) {
+ block = it->second;
+ }
+ // Hoist the instruction.
+ DCHECK(!instruction->HasEnvironment());
+ instruction->MoveBefore(block->GetLastInstruction());
+ }
+
+ /**
+ * Adds a new taken-test structure to a loop if needed (and not already done).
+ * The taken-test protects range analysis evaluation code to avoid any
+ * deoptimization caused by incorrect trip-count evaluation in non-taken loops.
+ *
+ * Returns block in which deoptimizations/invariants can be put.
+ *
+ * old_preheader
+ * |
+ * if_block <- taken-test protects deoptimization block
+ * / \
+ * true_block false_block <- deoptimizations/invariants are placed in true_block
+ * \ /
+ * new_preheader <- may require phi nodes to preserve SSA structure
+ * |
+ * header
+ *
+ * For example, this loop:
+ *
+ * for (int i = lower; i < upper; i++) {
+ * array[i] = 0;
+ * }
+ *
+ * will be transformed to:
+ *
+ * if (lower < upper) {
+ * if (array == null) deoptimize;
+ * array_length = array.length;
+ * if (lower > upper) deoptimize; // unsigned
+ * if (upper >= array_length) deoptimize; // unsigned
+ * } else {
+ * array_length = 0;
+ * }
+ * for (int i = lower; i < upper; i++) {
+ * // Loop without null check and bounds check, and any array.length replaced with array_length.
+ * array[i] = 0;
+ * }
+ */
+ HBasicBlock* TransformLoopForDeoptimizationIfNeeded(HLoopInformation* loop, bool needs_taken_test) {
+ // Not needed (can use preheader), or already done (can reuse)?
+ const uint32_t loop_id = loop->GetHeader()->GetBlockId();
+ if (!needs_taken_test) {
+ return loop->GetPreHeader();
+ } else {
+ auto it = taken_test_loop_.find(loop_id);
+ if (it != taken_test_loop_.end()) {
+ return it->second;
+ }
+ }
+
+ // Generate top test structure.
+ HBasicBlock* header = loop->GetHeader();
+ GetGraph()->TransformLoopHeaderForBCE(header);
+ HBasicBlock* new_preheader = loop->GetPreHeader();
+ HBasicBlock* if_block = new_preheader->GetDominator();
+ HBasicBlock* true_block = if_block->GetSuccessors()[0]; // True successor.
+ HBasicBlock* false_block = if_block->GetSuccessors()[1]; // False successor.
+
+ // Goto instructions.
+ true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+ false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+ new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto());
+
+ // Insert the taken-test to see if the loop body is entered. If the
+ // loop isn't entered at all, it jumps around the deoptimization block.
+ if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder
+ HInstruction* condition = nullptr;
+ induction_range_.GenerateTakenTest(header->GetLastInstruction(),
+ GetGraph(),
+ if_block,
+ &condition);
+ DCHECK(condition != nullptr);
+ if_block->RemoveInstruction(if_block->GetLastInstruction());
+ if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition));
+
+ taken_test_loop_.Put(loop_id, true_block);
+ return true_block;
+ }
+
+ /**
+ * Inserts phi nodes that preserve SSA structure in generated top test structures.
+ * All uses of instructions in the deoptimization block that reach the loop need
+ * a phi node in the new loop preheader to fix the dominance relation.
+ *
+ * Example:
+ * if_block
+ * / \
+ * x_0 = .. false_block
+ * \ /
+ * x_1 = phi(x_0, null) <- synthetic phi
+ * |
+ * header
+ */
+ void InsertPhiNodes() {
+ // Scan all new deoptimization blocks.
+ for (auto it1 = taken_test_loop_.begin(); it1 != taken_test_loop_.end(); ++it1) {
+ HBasicBlock* true_block = it1->second;
+ HBasicBlock* new_preheader = true_block->GetSingleSuccessor();
+ // Scan all instructions in a new deoptimization block.
+ for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ Primitive::Type type = instruction->GetType();
+ HPhi* phi = nullptr;
+ // Scan all uses of an instruction and replace each later use with a phi node.
+ for (HUseIterator<HInstruction*> it2(instruction->GetUses());
+ !it2.Done();
+ it2.Advance()) {
+ HInstruction* user = it2.Current()->GetUser();
+ if (user->GetBlock() != true_block) {
+ if (phi == nullptr) {
+ phi = NewPhi(new_preheader, instruction, type);
+ }
+ user->ReplaceInput(phi, it2.Current()->GetIndex());
+ }
+ }
+ // Scan all environment uses of an instruction and replace each later use with a phi node.
+ for (HUseIterator<HEnvironment*> it2(instruction->GetEnvUses());
+ !it2.Done();
+ it2.Advance()) {
+ HEnvironment* user = it2.Current()->GetUser();
+ if (user->GetHolder()->GetBlock() != true_block) {
+ if (phi == nullptr) {
+ phi = NewPhi(new_preheader, instruction, type);
+ }
+ user->RemoveAsUserOfInput(it2.Current()->GetIndex());
+ user->SetRawEnvAt(it2.Current()->GetIndex(), phi);
+ phi->AddEnvUseAt(user, it2.Current()->GetIndex());
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ * Construct a phi(instruction, 0) in the new preheader to fix the dominance relation.
+ * These are synthetic phi nodes without a virtual register.
+ */
+ HPhi* NewPhi(HBasicBlock* new_preheader,
+ HInstruction* instruction,
+ Primitive::Type type) {
+ HGraph* graph = GetGraph();
+ HInstruction* zero;
+ switch (type) {
+ case Primitive::Type::kPrimNot: zero = graph->GetNullConstant(); break;
+ case Primitive::Type::kPrimFloat: zero = graph->GetFloatConstant(0); break;
+ case Primitive::Type::kPrimDouble: zero = graph->GetDoubleConstant(0); break;
+ default: zero = graph->GetConstant(type, 0); break;
+ }
+ HPhi* phi = new (graph->GetArena())
+ HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type));
+ phi->SetRawInputAt(0, instruction);
+ phi->SetRawInputAt(1, zero);
+ new_preheader->AddPhi(phi);
+ return phi;
+ }
+
+ /** Helper method to replace an instruction with another instruction. */
+ static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+ instruction->ReplaceWith(replacement);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+
+ // A set of maps, one per basic block, from instruction to range.
ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_;
// Map an HArrayLength instruction's id to the first HBoundsCheck instruction in
// a block that checks a constant index against that HArrayLength.
ArenaSafeMap<int, HBoundsCheck*> first_constant_index_bounds_check_map_;
+ // Early-exit loop bookkeeping.
+ ArenaSafeMap<uint32_t, bool> early_exit_loop_;
+
+ // Taken-test loop bookkeeping.
+ ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_;
+
+ // Finite loop bookkeeping.
+ ArenaSet<uint32_t> finite_loop_;
+
// For the block, there is at least one HArrayLength instruction for which there
// is more than one bounds check instruction with constant indexing. And it's
// beneficial to add a compare instruction that has deoptimization fallback and
// eliminate those bounds checks.
bool need_to_revisit_block_;
+ // Flag that denotes whether deoptimization has occurred on array references
+ // with constant subscripts (see AddCompareWithDeoptimization()).
+ bool has_deoptimization_on_constant_subscripts_;
+
// Initial number of blocks.
uint32_t initial_block_size_;
+ // Side effects.
+ const SideEffectsAnalysis& side_effects_;
+
// Range analysis based on induction variables.
InductionVarRange induction_range_;
@@ -1870,14 +1645,12 @@ void BoundsCheckElimination::Run() {
return;
}
- BCEVisitor visitor(graph_, induction_analysis_);
// Reverse post order guarantees a node's dominators are visited first.
// We want to visit in the dominator-based order since if a value is known to
// be bounded by a range at one instruction, it must be true that all uses of
// that value dominated by that instruction fits in that range. Range of that
// value can be narrowed further down in the dominator tree.
- //
- // TODO: only visit blocks that dominate some array accesses.
+ BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
HBasicBlock* last_visited_block = nullptr;
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* current = it.Current();
@@ -1894,6 +1667,9 @@ void BoundsCheckElimination::Run() {
visitor.VisitBasicBlock(current);
last_visited_block = current;
}
+
+ // Perform cleanup.
+ visitor.Finish();
}
} // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index cdff3ca0ba..b9df686ffd 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -21,12 +21,16 @@
namespace art {
+class SideEffectsAnalysis;
class HInductionVarAnalysis;
class BoundsCheckElimination : public HOptimization {
public:
- BoundsCheckElimination(HGraph* graph, HInductionVarAnalysis* induction_analysis)
+ BoundsCheckElimination(HGraph* graph,
+ const SideEffectsAnalysis& side_effects,
+ HInductionVarAnalysis* induction_analysis)
: HOptimization(graph, kBoundsCheckEliminiationPassName),
+ side_effects_(side_effects),
induction_analysis_(induction_analysis) {}
void Run() OVERRIDE;
@@ -34,6 +38,7 @@ class BoundsCheckElimination : public HOptimization {
static constexpr const char* kBoundsCheckEliminiationPassName = "BCE";
private:
+ const SideEffectsAnalysis& side_effects_;
HInductionVarAnalysis* induction_analysis_;
DISALLOW_COPY_AND_ASSIGN(BoundsCheckElimination);
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index c9afdf2147..dbeb1ccc22 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -54,7 +54,7 @@ class BoundsCheckEliminationTest : public testing::Test {
HInductionVarAnalysis induction(graph_);
induction.Run();
- BoundsCheckElimination(graph_, &induction).Run();
+ BoundsCheckElimination(graph_, side_effects, &induction).Run();
}
ArenaPool pool_;
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index ed193c7b61..8e75bdcdc9 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -359,18 +359,10 @@ void HGraphBuilder::InsertTryBoundaryBlocks(const DexFile::CodeItem& code_item)
// need a strategy for splitting exceptional edges. We split the block
// after the move-exception (if present) and mark the first part not
// throwing. The normal-flow edge between them will be split later.
- HInstruction* first_insn = block->GetFirstInstruction();
- if (first_insn->IsLoadException()) {
- // Catch block starts with a LoadException. Split the block after
- // the StoreLocal and ClearException which must come after the load.
- DCHECK(first_insn->GetNext()->IsStoreLocal());
- DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
- throwing_block = block->SplitBefore(first_insn->GetNext()->GetNext()->GetNext());
- } else {
- // Catch block does not load the exception. Split at the beginning
- // to create an empty catch block.
- throwing_block = block->SplitBefore(first_insn);
- }
+ throwing_block = block->SplitCatchBlockAfterMoveException();
+ // Move-exception does not throw and the block has throwing insructions
+ // so it must have been possible to split it.
+ DCHECK(throwing_block != nullptr);
}
try_block_info.Put(throwing_block->GetBlockId(),
@@ -743,6 +735,79 @@ static InvokeType GetInvokeTypeFromOpCode(Instruction::Code opcode) {
}
}
+ArtMethod* HGraphBuilder::ResolveMethod(uint16_t method_idx, InvokeType invoke_type) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScope<2> hs(soa.Self());
+
+ ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
+ Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+ soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
+ Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
+
+ ArtMethod* resolved_method = class_linker->ResolveMethod(
+ *dex_compilation_unit_->GetDexFile(),
+ method_idx,
+ dex_compilation_unit_->GetDexCache(),
+ class_loader,
+ /* referrer */ nullptr,
+ invoke_type);
+
+ if (UNLIKELY(resolved_method == nullptr)) {
+ // Clean up any exception left by type resolution.
+ soa.Self()->ClearException();
+ return nullptr;
+ }
+
+ // Check access. The class linker has a fast path for looking into the dex cache
+ // and does not check the access if it hits it.
+ if (compiling_class.Get() == nullptr) {
+ if (!resolved_method->IsPublic()) {
+ return nullptr;
+ }
+ } else if (!compiling_class->CanAccessResolvedMethod(resolved_method->GetDeclaringClass(),
+ resolved_method,
+ dex_compilation_unit_->GetDexCache().Get(),
+ method_idx)) {
+ return nullptr;
+ }
+
+ // We have to special case the invoke-super case, as ClassLinker::ResolveMethod does not.
+ // We need to look at the referrer's super class vtable.
+ if (invoke_type == kSuper) {
+ if (compiling_class.Get() == nullptr) {
+ // Invoking a super method requires knowing the actual super class. If we did not resolve
+ // the compiling method's declaring class (which only happens for ahead of time compilation),
+ // bail out.
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ return nullptr;
+ }
+ uint16_t vtable_index = resolved_method->GetMethodIndex();
+ ArtMethod* actual_method = compiling_class->GetSuperClass()->GetVTableEntry(
+ vtable_index, class_linker->GetImagePointerSize());
+ if (actual_method != resolved_method &&
+ !IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+ // TODO: The actual method could still be referenced in the current dex file, so we
+ // could try locating it.
+ // TODO: Remove the dex_file restriction.
+ return nullptr;
+ }
+ if (!actual_method->IsInvokable()) {
+ // Fail if the actual method cannot be invoked. Otherwise, the runtime resolution stub
+ // could resolve the callee to the wrong method.
+ return nullptr;
+ }
+ resolved_method = actual_method;
+ }
+
+ // Check for incompatible class changes. The class linker has a fast path for
+ // looking into the dex cache and does not check incompatible class changes if it hits it.
+ if (resolved_method->CheckIncompatibleClassChange(invoke_type)) {
+ return nullptr;
+ }
+
+ return resolved_method;
+}
+
bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
uint32_t dex_pc,
uint32_t method_idx,
@@ -750,22 +815,18 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
bool is_range,
uint32_t* args,
uint32_t register_index) {
- InvokeType original_invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
- InvokeType optimized_invoke_type = original_invoke_type;
+ InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
const char* descriptor = dex_file_->GetMethodShorty(method_idx);
Primitive::Type return_type = Primitive::GetType(descriptor[0]);
// Remove the return type from the 'proto'.
size_t number_of_arguments = strlen(descriptor) - 1;
- if (original_invoke_type != kStatic) { // instance call
+ if (invoke_type != kStatic) { // instance call
// One extra argument for 'this'.
number_of_arguments++;
}
MethodReference target_method(dex_file_, method_idx);
- int32_t table_index = 0;
- uintptr_t direct_code = 0;
- uintptr_t direct_method = 0;
// Special handling for string init.
int32_t string_init_offset = 0;
@@ -788,7 +849,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
method_idx,
target_method,
dispatch_info,
- original_invoke_type,
+ invoke_type,
kStatic /* optimized_invoke_type */,
HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
return HandleStringInit(invoke,
@@ -799,23 +860,16 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
descriptor);
}
- // Handle unresolved methods.
- if (!compiler_driver_->ComputeInvokeInfo(dex_compilation_unit_,
- dex_pc,
- true /* update_stats */,
- true /* enable_devirtualization */,
- &optimized_invoke_type,
- &target_method,
- &table_index,
- &direct_code,
- &direct_method)) {
+ ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type);
+
+ if (resolved_method == nullptr) {
MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod);
HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_,
number_of_arguments,
return_type,
dex_pc,
method_idx,
- original_invoke_type);
+ invoke_type);
return HandleInvoke(invoke,
number_of_vreg_arguments,
args,
@@ -825,21 +879,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
nullptr /* clinit_check */);
}
- // Handle resolved methods (non string init).
-
- DCHECK(optimized_invoke_type != kSuper);
-
// Potential class initialization check, in the case of a static method call.
HClinitCheck* clinit_check = nullptr;
HInvoke* invoke = nullptr;
- if (optimized_invoke_type == kDirect || optimized_invoke_type == kStatic) {
+ if (invoke_type == kDirect || invoke_type == kStatic || invoke_type == kSuper) {
// By default, consider that the called method implicitly requires
// an initialization check of its declaring method.
HInvokeStaticOrDirect::ClinitCheckRequirement clinit_check_requirement
= HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
- if (optimized_invoke_type == kStatic) {
- clinit_check = ProcessClinitCheckForInvoke(dex_pc, method_idx, &clinit_check_requirement);
+ ScopedObjectAccess soa(Thread::Current());
+ if (invoke_type == kStatic) {
+ clinit_check = ProcessClinitCheckForInvoke(
+ dex_pc, resolved_method, method_idx, &clinit_check_requirement);
+ } else if (invoke_type == kSuper) {
+ if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
+ // Update the target method to the one resolved. Note that this may be a no-op if
+ // we resolved to the method referenced by the instruction.
+ method_idx = resolved_method->GetDexMethodIndex();
+ target_method = MethodReference(dex_file_, method_idx);
+ }
}
HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
@@ -855,24 +914,26 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
method_idx,
target_method,
dispatch_info,
- original_invoke_type,
- optimized_invoke_type,
+ invoke_type,
+ invoke_type,
clinit_check_requirement);
- } else if (optimized_invoke_type == kVirtual) {
+ } else if (invoke_type == kVirtual) {
+ ScopedObjectAccess soa(Thread::Current()); // Needed for the method index
invoke = new (arena_) HInvokeVirtual(arena_,
number_of_arguments,
return_type,
dex_pc,
method_idx,
- table_index);
+ resolved_method->GetMethodIndex());
} else {
- DCHECK_EQ(optimized_invoke_type, kInterface);
+ DCHECK_EQ(invoke_type, kInterface);
+ ScopedObjectAccess soa(Thread::Current()); // Needed for the method index
invoke = new (arena_) HInvokeInterface(arena_,
number_of_arguments,
return_type,
dex_pc,
method_idx,
- table_index);
+ resolved_method->GetDexMethodIndex());
}
return HandleInvoke(invoke,
@@ -884,26 +945,106 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction,
clinit_check);
}
-HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
- uint32_t dex_pc,
- uint32_t method_idx,
- HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+bool HGraphBuilder::BuildNewInstance(uint16_t type_index, uint32_t dex_pc) {
+ bool finalizable;
+ bool can_throw = NeedsAccessCheck(type_index, &finalizable);
+
+ // Only the non-resolved entrypoint handles the finalizable class case. If we
+ // need access checks, then we haven't resolved the method and the class may
+ // again be finalizable.
+ QuickEntrypointEnum entrypoint = (finalizable || can_throw)
+ ? kQuickAllocObject
+ : kQuickAllocObjectInitialized;
+
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<4> hs(soa.Self());
+ StackHandleScope<3> hs(soa.Self());
Handle<mirror::DexCache> dex_cache(hs.NewHandle(
dex_compilation_unit_->GetClassLinker()->FindDexCache(
soa.Self(), *dex_compilation_unit_->GetDexFile())));
- Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
- soa.Decode<mirror::ClassLoader*>(dex_compilation_unit_->GetClassLoader())));
- ArtMethod* resolved_method = compiler_driver_->ResolveMethod(
- soa, dex_cache, class_loader, dex_compilation_unit_, method_idx, InvokeType::kStatic);
+ Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
+ const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+ Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
+ outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+
+ if (outer_dex_cache.Get() != dex_cache.Get()) {
+ // We currently do not support inlining allocations across dex files.
+ return false;
+ }
+
+ HLoadClass* load_class = new (arena_) HLoadClass(
+ graph_->GetCurrentMethod(),
+ type_index,
+ outer_dex_file,
+ IsOutermostCompilingClass(type_index),
+ dex_pc,
+ /*needs_access_check*/ can_throw,
+ compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, type_index));
+
+ current_block_->AddInstruction(load_class);
+ HInstruction* cls = load_class;
+ if (!IsInitialized(resolved_class)) {
+ cls = new (arena_) HClinitCheck(load_class, dex_pc);
+ current_block_->AddInstruction(cls);
+ }
+
+ current_block_->AddInstruction(new (arena_) HNewInstance(
+ cls,
+ graph_->GetCurrentMethod(),
+ dex_pc,
+ type_index,
+ *dex_compilation_unit_->GetDexFile(),
+ can_throw,
+ finalizable,
+ entrypoint));
+ return true;
+}
+
+static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class)
+ SHARED_REQUIRES(Locks::mutator_lock_) {
+ return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
+}
- DCHECK(resolved_method != nullptr);
+bool HGraphBuilder::IsInitialized(Handle<mirror::Class> cls) const {
+ if (cls.Get() == nullptr) {
+ return false;
+ }
+
+ // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
+ // check whether the class is in an image for the AOT compilation.
+ if (cls->IsInitialized() &&
+ compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
+ return true;
+ }
+
+ if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+ return true;
+ }
+
+ // TODO: We should walk over the inlined methods, but we don't pass
+ // that information to the builder.
+ if (IsSubClass(GetCompilingClass(), cls.Get())) {
+ return true;
+ }
+
+ return false;
+}
+HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
+ uint32_t dex_pc,
+ ArtMethod* resolved_method,
+ uint32_t method_idx,
+ HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
const DexFile& outer_dex_file = *outer_compilation_unit_->GetDexFile();
+ Thread* self = Thread::Current();
+ StackHandleScope<4> hs(self);
+ Handle<mirror::DexCache> dex_cache(hs.NewHandle(
+ dex_compilation_unit_->GetClassLinker()->FindDexCache(
+ self, *dex_compilation_unit_->GetDexFile())));
Handle<mirror::DexCache> outer_dex_cache(hs.NewHandle(
- outer_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), outer_dex_file)));
+ outer_compilation_unit_->GetClassLinker()->FindDexCache(
+ self, outer_dex_file)));
Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
+ Handle<mirror::Class> resolved_method_class(hs.NewHandle(resolved_method->GetDeclaringClass()));
// The index at which the method's class is stored in the DexCache's type array.
uint32_t storage_index = DexFile::kDexNoIndex;
@@ -921,41 +1062,21 @@ HClinitCheck* HGraphBuilder::ProcessClinitCheckForInvoke(
HClinitCheck* clinit_check = nullptr;
- if (!outer_class->IsInterface()
- && outer_class->IsSubClass(resolved_method->GetDeclaringClass())) {
- // If the outer class is the declaring class or a subclass
- // of the declaring class, no class initialization is needed
- // before the static method call.
- // Note that in case of inlining, we do not need to add clinit checks
- // to calls that satisfy this subclass check with any inlined methods. This
- // will be detected by the optimization passes.
+ if (IsInitialized(resolved_method_class)) {
*clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
} else if (storage_index != DexFile::kDexNoIndex) {
- // If the method's class type index is available, check
- // whether we should add an explicit class initialization
- // check for its declaring class before the static method call.
-
- // TODO: find out why this check is needed.
- bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
- *outer_compilation_unit_->GetDexFile(), storage_index);
- bool is_initialized =
- resolved_method->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
- if (is_initialized) {
- *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kNone;
- } else {
- *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
- HLoadClass* load_class = new (arena_) HLoadClass(
- graph_->GetCurrentMethod(),
- storage_index,
- *dex_compilation_unit_->GetDexFile(),
- is_outer_class,
- dex_pc,
- /*needs_access_check*/ false);
- current_block_->AddInstruction(load_class);
- clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
- current_block_->AddInstruction(clinit_check);
- }
+ *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
+ HLoadClass* load_class = new (arena_) HLoadClass(
+ graph_->GetCurrentMethod(),
+ storage_index,
+ outer_dex_file,
+ is_outer_class,
+ dex_pc,
+ /*needs_access_check*/ false,
+ compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index));
+ current_block_->AddInstruction(load_class);
+ clinit_check = new (arena_) HClinitCheck(load_class, dex_pc);
+ current_block_->AddInstruction(clinit_check);
}
return clinit_check;
}
@@ -1006,7 +1127,9 @@ bool HGraphBuilder::SetupInvokeArguments(HInvoke* invoke,
return false;
}
- if (invoke->IsInvokeStaticOrDirect()) {
+ if (invoke->IsInvokeStaticOrDirect() &&
+ HInvokeStaticOrDirect::NeedsCurrentMethodInput(
+ invoke->AsInvokeStaticOrDirect()->GetMethodLoadKind())) {
invoke->SetArgumentAt(*argument_index, graph_->GetCurrentMethod());
(*argument_index)++;
}
@@ -1278,7 +1401,7 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint16_t field_index = instruction.VRegB_21c();
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<4> hs(soa.Self());
+ StackHandleScope<5> hs(soa.Self());
Handle<mirror::DexCache> dex_cache(hs.NewHandle(
dex_compilation_unit_->GetClassLinker()->FindDexCache(
soa.Self(), *dex_compilation_unit_->GetDexFile())));
@@ -1324,26 +1447,26 @@ bool HGraphBuilder::BuildStaticFieldAccess(const Instruction& instruction,
}
}
- // TODO: find out why this check is needed.
- bool is_in_dex_cache = compiler_driver_->CanAssumeTypeIsPresentInDexCache(
- *outer_compilation_unit_->GetDexFile(), storage_index);
- bool is_initialized = resolved_field->GetDeclaringClass()->IsInitialized() && is_in_dex_cache;
-
+ bool is_in_cache =
+ compiler_driver_->CanAssumeTypeIsPresentInDexCache(outer_dex_file, storage_index);
HLoadClass* constant = new (arena_) HLoadClass(graph_->GetCurrentMethod(),
storage_index,
- *dex_compilation_unit_->GetDexFile(),
+ outer_dex_file,
is_outer_class,
dex_pc,
- /*needs_access_check*/ false);
+ /*needs_access_check*/ false,
+ is_in_cache);
current_block_->AddInstruction(constant);
HInstruction* cls = constant;
- if (!is_initialized && !is_outer_class) {
+
+ Handle<mirror::Class> klass(hs.NewHandle(resolved_field->GetDeclaringClass()));
+ if (!IsInitialized(klass)) {
cls = new (arena_) HClinitCheck(constant, dex_pc);
current_block_->AddInstruction(cls);
}
- uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex();
+ uint16_t class_def_index = klass->GetDexClassDefIndex();
if (is_put) {
// We need to keep the class alive before loading the value.
Temporaries temps(graph_);
@@ -1455,7 +1578,8 @@ void HGraphBuilder::BuildFilledNewArray(uint32_t dex_pc,
uint32_t* args,
uint32_t register_index) {
HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
- QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+ bool finalizable;
+ QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
? kQuickAllocArrayWithAccessCheck
: kQuickAllocArray;
HInstruction* object = new (arena_) HNewArray(length,
@@ -1606,19 +1730,20 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
ScopedObjectAccess soa(Thread::Current());
StackHandleScope<2> hs(soa.Self());
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
Handle<mirror::DexCache> dex_cache(hs.NewHandle(
- dex_compilation_unit_->GetClassLinker()->FindDexCache(
- soa.Self(), *dex_compilation_unit_->GetDexFile())));
+ dex_compilation_unit_->GetClassLinker()->FindDexCache(soa.Self(), dex_file)));
Handle<mirror::Class> resolved_class(hs.NewHandle(dex_cache->GetResolvedType(type_index)));
HInstruction* object = LoadLocal(reference, Primitive::kPrimNot, dex_pc);
HLoadClass* cls = new (arena_) HLoadClass(
graph_->GetCurrentMethod(),
type_index,
- *dex_compilation_unit_->GetDexFile(),
+ dex_file,
IsOutermostCompilingClass(type_index),
dex_pc,
- !can_access);
+ !can_access,
+ compiler_driver_->CanAssumeTypeIsPresentInDexCache(dex_file, type_index));
current_block_->AddInstruction(cls);
// The class needs a temporary before being used by the type check.
@@ -1635,9 +1760,9 @@ void HGraphBuilder::BuildTypeCheck(const Instruction& instruction,
}
}
-bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index) const {
+bool HGraphBuilder::NeedsAccessCheck(uint32_t type_index, bool* finalizable) const {
return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
- dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index);
+ dex_compilation_unit_->GetDexMethodIndex(), *dex_file_, type_index, finalizable);
}
void HGraphBuilder::BuildSwitchJumpTable(const SwitchTable& table,
@@ -2514,16 +2639,9 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
current_block_->AddInstruction(fake_string);
UpdateLocal(register_index, fake_string, dex_pc);
} else {
- QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
- ? kQuickAllocObjectWithAccessCheck
- : kQuickAllocObject;
-
- current_block_->AddInstruction(new (arena_) HNewInstance(
- graph_->GetCurrentMethod(),
- dex_pc,
- type_index,
- *dex_compilation_unit_->GetDexFile(),
- entrypoint));
+ if (!BuildNewInstance(type_index, dex_pc)) {
+ return false;
+ }
UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction(), dex_pc);
}
break;
@@ -2532,7 +2650,8 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
case Instruction::NEW_ARRAY: {
uint16_t type_index = instruction.VRegC_22c();
HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt, dex_pc);
- QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index)
+ bool finalizable;
+ QuickEntrypointEnum entrypoint = NeedsAccessCheck(type_index, &finalizable)
? kQuickAllocArrayWithAccessCheck
: kQuickAllocArray;
current_block_->AddInstruction(new (arena_) HNewArray(length,
@@ -2750,10 +2869,11 @@ bool HGraphBuilder::AnalyzeDexInstruction(const Instruction& instruction, uint32
current_block_->AddInstruction(new (arena_) HLoadClass(
graph_->GetCurrentMethod(),
type_index,
- *dex_compilation_unit_->GetDexFile(),
+ *dex_file_,
IsOutermostCompilingClass(type_index),
dex_pc,
- !can_access));
+ !can_access,
+ compiler_driver_->CanAssumeTypeIsPresentInDexCache(*dex_file_, type_index)));
UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction(), dex_pc);
break;
}
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 9eaa4b62c5..c3979f3dd1 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -138,7 +138,10 @@ class HGraphBuilder : public ValueObject {
HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type, uint32_t dex_pc) const;
void PotentiallyAddSuspendCheck(HBasicBlock* target, uint32_t dex_pc);
void InitializeParameters(uint16_t number_of_parameters);
- bool NeedsAccessCheck(uint32_t type_index) const;
+
+ // Returns whether the current method needs access check for the type.
+ // Output parameter finalizable is set to whether the type is finalizable.
+ bool NeedsAccessCheck(uint32_t type_index, /*out*/bool* finalizable) const;
template<typename T>
void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc);
@@ -302,8 +305,21 @@ class HGraphBuilder : public ValueObject {
HClinitCheck* ProcessClinitCheckForInvoke(
uint32_t dex_pc,
+ ArtMethod* method,
uint32_t method_idx,
- HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement);
+ HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
+ // Build a HNewInstance instruction.
+ bool BuildNewInstance(uint16_t type_index, uint32_t dex_pc);
+
+ // Return whether the compiler can assume `cls` is initialized.
+ bool IsInitialized(Handle<mirror::Class> cls) const
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
+ // Try to resolve a method using the class linker. Return null if a method could
+ // not be resolved.
+ ArtMethod* ResolveMethod(uint16_t method_idx, InvokeType invoke_type);
ArenaAllocator* const arena_;
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a1bb5e0838..0baa0e30dc 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -42,7 +42,7 @@
#include "compiled_method.h"
#include "dex/verified_method.h"
-#include "driver/dex_compilation_unit.h"
+#include "driver/compiler_driver.h"
#include "gc_map_builder.h"
#include "graph_visualizer.h"
#include "intrinsics.h"
@@ -208,6 +208,7 @@ class DisassemblyScope {
void CodeGenerator::GenerateSlowPaths() {
size_t code_start = 0;
for (SlowPathCode* slow_path : slow_paths_) {
+ current_slow_path_ = slow_path;
if (disasm_info_ != nullptr) {
code_start = GetAssembler()->CodeSize();
}
@@ -216,6 +217,7 @@ void CodeGenerator::GenerateSlowPaths() {
disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize());
}
}
+ current_slow_path_ = nullptr;
}
void CodeGenerator::CompileInternal(CodeAllocator* allocator, bool is_baseline) {
@@ -308,7 +310,7 @@ size_t CodeGenerator::FindTwoFreeConsecutiveAlignedEntries(bool* array, size_t l
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fp_registers,
+ size_t maximum_number_of_live_fpu_registers,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order) {
block_order_ = &block_order;
@@ -322,14 +324,14 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
&& IsLeafMethod()
&& !RequiresCurrentMethod()) {
DCHECK_EQ(maximum_number_of_live_core_registers, 0u);
- DCHECK_EQ(maximum_number_of_live_fp_registers, 0u);
+ DCHECK_EQ(maximum_number_of_live_fpu_registers, 0u);
SetFrameSize(CallPushesPC() ? GetWordSize() : 0);
} else {
SetFrameSize(RoundUp(
number_of_spill_slots * kVRegSize
+ number_of_out_slots * kVRegSize
+ maximum_number_of_live_core_registers * GetWordSize()
- + maximum_number_of_live_fp_registers * GetFloatingPointSpillSlotSize()
+ + maximum_number_of_live_fpu_registers * GetFloatingPointSpillSlotSize()
+ FrameEntrySpillSize(),
kStackAlignment));
}
@@ -381,11 +383,11 @@ void CodeGenerator::CreateCommonInvokeLocationSummary(
HInvokeStaticOrDirect* call = invoke->AsInvokeStaticOrDirect();
switch (call->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- locations->SetInAt(call->GetCurrentMethodInputIndex(), visitor->GetMethodLocation());
+ locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
locations->AddTemp(visitor->GetMethodLocation());
- locations->SetInAt(call->GetCurrentMethodInputIndex(), Location::RequiresRegister());
+ locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
break;
default:
locations->AddTemp(visitor->GetMethodLocation());
@@ -545,15 +547,19 @@ void CodeGenerator::GenerateUnresolvedFieldAccess(
}
}
+// TODO: Remove argument `code_generator_supports_read_barrier` when
+// all code generators have read barrier support.
void CodeGenerator::CreateLoadClassLocationSummary(HLoadClass* cls,
Location runtime_type_index_location,
- Location runtime_return_location) {
+ Location runtime_return_location,
+ bool code_generator_supports_read_barrier) {
ArenaAllocator* allocator = cls->GetBlock()->GetGraph()->GetArena();
LocationSummary::CallKind call_kind = cls->NeedsAccessCheck()
? LocationSummary::kCall
- : (cls->CanCallRuntime()
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall);
+ : (((code_generator_supports_read_barrier && kEmitCompilerReadBarrier) ||
+ cls->CanCallRuntime())
+ ? LocationSummary::kCallOnSlowPath
+ : LocationSummary::kNoCall);
LocationSummary* locations = new (allocator) LocationSummary(cls, call_kind);
if (cls->NeedsAccessCheck()) {
locations->SetInAt(0, Location::NoLocation());
@@ -787,9 +793,10 @@ CodeGenerator* CodeGenerator::Create(HGraph* graph,
}
void CodeGenerator::BuildNativeGCMap(
- ArenaVector<uint8_t>* data, const DexCompilationUnit& dex_compilation_unit) const {
+ ArenaVector<uint8_t>* data, const CompilerDriver& compiler_driver) const {
const std::vector<uint8_t>& gc_map_raw =
- dex_compilation_unit.GetVerifiedMethod()->GetDexGcMap();
+ compiler_driver.GetVerifiedMethod(&GetGraph()->GetDexFile(), GetGraph()->GetMethodIdx())
+ ->GetDexGcMap();
verifier::DexPcToReferenceMap dex_gc_map(&(gc_map_raw)[0]);
uint32_t max_native_offset = stack_map_stream_.ComputeMaxNativePcOffset();
@@ -911,19 +918,22 @@ void CodeGenerator::BuildVMapTable(ArenaVector<uint8_t>* data) const {
vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker);
}
-void CodeGenerator::BuildStackMaps(ArenaVector<uint8_t>* data) {
- uint32_t size = stack_map_stream_.PrepareForFillIn();
- data->resize(size);
- MemoryRegion region(data->data(), size);
+size_t CodeGenerator::ComputeStackMapsSize() {
+ return stack_map_stream_.PrepareForFillIn();
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region) {
stack_map_stream_.FillIn(region);
}
void CodeGenerator::RecordNativeDebugInfo(uint32_t dex_pc,
uintptr_t native_pc_begin,
uintptr_t native_pc_end) {
- if (src_map_ != nullptr && dex_pc != kNoDexPc && native_pc_begin != native_pc_end) {
- src_map_->push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
- static_cast<int32_t>(dex_pc)}));
+ if (compiler_options_.GetGenerateDebugInfo() &&
+ dex_pc != kNoDexPc &&
+ native_pc_begin != native_pc_end) {
+ src_map_.push_back(SrcMapElem({static_cast<uint32_t>(native_pc_begin),
+ static_cast<int32_t>(dex_pc)}));
}
}
@@ -1314,21 +1324,38 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
// coherent with the runtime call generated, and that the GC side effect is
// set when required.
if (slow_path == nullptr) {
- DCHECK(instruction->GetLocations()->WillCall()) << instruction->DebugName();
+ DCHECK(instruction->GetLocations()->WillCall())
+ << "instruction->DebugName()=" << instruction->DebugName();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()))
- << instruction->DebugName() << instruction->GetSideEffects().ToString();
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString();
} else {
DCHECK(instruction->GetLocations()->OnlyCallsOnSlowPath() || slow_path->IsFatal())
- << instruction->DebugName() << slow_path->GetDescription();
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
// Control flow would not come back into the code if a fatal slow
// path is taken, so we do not care if it triggers GC.
slow_path->IsFatal() ||
// HDeoptimize is a special case: we know we are not coming back from
// it into the code.
- instruction->IsDeoptimize())
- << instruction->DebugName() << instruction->GetSideEffects().ToString()
- << slow_path->GetDescription();
+ instruction->IsDeoptimize() ||
+ // When read barriers are enabled, some instructions use a
+ // slow path to emit a read barrier, which does not trigger
+ // GC, is not fatal, nor is emitted by HDeoptimize
+ // instructions.
+ (kEmitCompilerReadBarrier &&
+ (instruction->IsInstanceFieldGet() ||
+ instruction->IsStaticFieldGet() ||
+ instruction->IsArraySet() ||
+ instruction->IsArrayGet() ||
+ instruction->IsLoadClass() ||
+ instruction->IsLoadString() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast())))
+ << "instruction->DebugName()=" << instruction->DebugName()
+ << " instruction->GetSideEffects().ToString()=" << instruction->GetSideEffects().ToString()
+ << " slow_path->GetDescription()=" << slow_path->GetDescription();
}
// Check the coherency of leaf information.
@@ -1340,11 +1367,12 @@ void CodeGenerator::ValidateInvokeRuntime(HInstruction* instruction, SlowPathCod
}
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* register_set = locations->GetLiveRegisters();
+ RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (register_set->ContainsCoreRegister(i)) {
+ if (live_registers->ContainsCoreRegister(i)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
locations->SetStackBit(stack_offset / kVRegSize);
@@ -1359,7 +1387,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo
for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (register_set->ContainsFloatingPointRegister(i)) {
+ if (live_registers->ContainsFloatingPointRegister(i)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
saved_fpu_stack_offsets_[i] = stack_offset;
@@ -1370,12 +1398,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo
}
void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
- RegisterSet* register_set = locations->GetLiveRegisters();
+ RegisterSet* live_registers = locations->GetLiveRegisters();
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+
for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
if (!codegen->IsCoreCalleeSaveRegister(i)) {
- if (register_set->ContainsCoreRegister(i)) {
+ if (live_registers->ContainsCoreRegister(i)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
}
}
@@ -1383,8 +1413,9 @@ void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary*
for (size_t i = 0, e = codegen->GetNumberOfFloatingPointRegisters(); i < e; ++i) {
if (!codegen->IsFloatingPointCalleeSaveRegister(i)) {
- if (register_set->ContainsFloatingPointRegister(i)) {
+ if (live_registers->ContainsFloatingPointRegister(i)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+ DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
stack_offset += codegen->RestoreFloatingPointRegister(stack_offset, i);
}
}
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 47b6f30450..114d97be94 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -22,6 +22,7 @@
#include "base/arena_containers.h"
#include "base/arena_object.h"
#include "base/bit_field.h"
+#include "compiled_method.h"
#include "driver/compiler_options.h"
#include "globals.h"
#include "graph_visualizer.h"
@@ -51,13 +52,9 @@ static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
class Assembler;
class CodeGenerator;
-class DexCompilationUnit;
+class CompilerDriver;
class LinkerPatch;
class ParallelMoveResolver;
-class SrcMapElem;
-template <class Alloc>
-class SrcMap;
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
class CodeAllocator {
public:
@@ -204,7 +201,7 @@ class CodeGenerator {
virtual uintptr_t GetAddressOf(HBasicBlock* block) const = 0;
void InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_number_of_live_core_registers,
- size_t maximum_number_of_live_fp_registers,
+ size_t maximum_number_of_live_fpu_registers,
size_t number_of_out_slots,
const ArenaVector<HBasicBlock*>& block_order);
int32_t GetStackSlot(HLocal* local) const;
@@ -253,6 +250,15 @@ class CodeGenerator {
// Returns whether we should split long moves in parallel moves.
virtual bool ShouldSplitLongMoves() const { return false; }
+ size_t GetNumberOfCoreCalleeSaveRegisters() const {
+ return POPCOUNT(core_callee_save_mask_);
+ }
+
+ size_t GetNumberOfCoreCallerSaveRegisters() const {
+ DCHECK_GE(GetNumberOfCoreRegisters(), GetNumberOfCoreCalleeSaveRegisters());
+ return GetNumberOfCoreRegisters() - GetNumberOfCoreCalleeSaveRegisters();
+ }
+
bool IsCoreCalleeSaveRegister(int reg) const {
return (core_callee_save_mask_ & (1 << reg)) != 0;
}
@@ -284,13 +290,12 @@ class CodeGenerator {
slow_paths_.push_back(slow_path);
}
- void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; }
-
void BuildMappingTable(ArenaVector<uint8_t>* vector) const;
void BuildVMapTable(ArenaVector<uint8_t>* vector) const;
void BuildNativeGCMap(
- ArenaVector<uint8_t>* vector, const DexCompilationUnit& dex_compilation_unit) const;
- void BuildStackMaps(ArenaVector<uint8_t>* vector);
+ ArenaVector<uint8_t>* vector, const CompilerDriver& compiler_driver) const;
+ void BuildStackMaps(MemoryRegion region);
+ size_t ComputeStackMapsSize();
bool IsBaseline() const {
return is_baseline_;
@@ -420,7 +425,8 @@ class CodeGenerator {
// TODO: This overlaps a bit with MoveFromReturnRegister. Refactor for a better design.
static void CreateLoadClassLocationSummary(HLoadClass* cls,
Location runtime_type_index_location,
- Location runtime_return_location);
+ Location runtime_return_location,
+ bool code_generator_supports_read_barrier = false);
static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
@@ -446,6 +452,10 @@ class CodeGenerator {
// Copy the result of a call into the given target.
virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
+ const ArenaVector<SrcMapElem>& GetSrcMappingTable() const {
+ return src_map_;
+ }
+
protected:
// Method patch info used for recording locations of required linker patches and
// target methods. The target method can be used for various purposes, whether for
@@ -488,8 +498,9 @@ class CodeGenerator {
stats_(stats),
graph_(graph),
compiler_options_(compiler_options),
- src_map_(nullptr),
+ src_map_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ current_slow_path_(nullptr),
current_block_index_(0),
is_leaf_(true),
requires_current_method_(false) {
@@ -557,6 +568,10 @@ class CodeGenerator {
return raw_pointer_to_labels_array + block->GetBlockId();
}
+ SlowPathCode* GetCurrentSlowPath() {
+ return current_slow_path_;
+ }
+
// Frame size required for this method.
uint32_t frame_size_;
uint32_t core_spill_mask_;
@@ -602,9 +617,12 @@ class CodeGenerator {
const CompilerOptions& compiler_options_;
// Native to dex_pc map used for native debugging/profiling tools.
- DefaultSrcMap* src_map_;
+ ArenaVector<SrcMapElem> src_map_;
ArenaVector<SlowPathCode*> slow_paths_;
+ // The current slow path that we're generating code for.
+ SlowPathCode* current_slow_path_;
+
// The current block index in `block_order_` of the block
// we are generating code for.
size_t current_block_index_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3dc3b7fba0..ac6b5e823a 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -34,6 +34,9 @@
namespace art {
+template<class MirrorType>
+class GcRoot;
+
namespace arm {
static bool ExpectedPairLayout(Location location) {
@@ -74,6 +77,7 @@ class NullCheckSlowPathARM : public SlowPathCode {
}
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowNullPointer), instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -98,6 +102,7 @@ class DivZeroCheckSlowPathARM : public SlowPathCode {
}
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowDivZero), instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -120,6 +125,7 @@ class SuspendCheckSlowPathARM : public SlowPathCode {
SaveLiveRegisters(codegen, instruction_->GetLocations());
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pTestSuspend), instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ b(GetReturnLabel());
@@ -176,6 +182,7 @@ class BoundsCheckSlowPathARM : public SlowPathCode {
Primitive::kPrimInt);
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pThrowArrayBounds), instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -211,6 +218,11 @@ class LoadClassSlowPathARM : public SlowPathCode {
? QUICK_ENTRY_POINT(pInitializeStaticStorage)
: QUICK_ENTRY_POINT(pInitializeType);
arm_codegen->InvokeRuntime(entry_point_offset, at_, dex_pc_, this);
+ if (do_clinit_) {
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ } else {
+ CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ }
// Move the class to the desired location.
Location out = locations->Out();
@@ -257,6 +269,7 @@ class LoadStringSlowPathARM : public SlowPathCode {
__ LoadImmediate(calling_convention.GetRegisterAt(0), instruction_->GetStringIndex());
arm_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pResolveString), instruction_, instruction_->GetDexPc(), this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
RestoreLiveRegisters(codegen, locations);
@@ -286,15 +299,6 @@ class TypeCheckSlowPathARM : public SlowPathCode {
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->IsCheckCast()) {
- // The codegen for the instruction overwrites `temp`, so put it back in place.
- Register obj = locations->InAt(0).AsRegister<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
- }
-
if (!is_fatal_) {
SaveLiveRegisters(codegen, locations);
}
@@ -315,6 +319,8 @@ class TypeCheckSlowPathARM : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<
+ kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
} else {
DCHECK(instruction_->IsCheckCast());
@@ -322,6 +328,7 @@ class TypeCheckSlowPathARM : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
if (!is_fatal_) {
@@ -354,6 +361,7 @@ class DeoptimizationSlowPathARM : public SlowPathCode {
uint32_t dex_pc = deoptimize->GetDexPc();
CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM"; }
@@ -396,6 +404,7 @@ class ArraySetSlowPathARM : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ b(GetExitLabel());
}
@@ -408,6 +417,221 @@ class ArraySetSlowPathARM : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
};
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM : public SlowPathCode {
+ public:
+ ReadBarrierForHeapReferenceSlowPathARM(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : instruction_(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ LoadFromOffset(kLoadWord, out, out, offset);
+ // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(!instruction_->IsInvoke() ||
+ (instruction_->IsInvokeStaticOrDirect() &&
+ instruction_->GetLocations()->Intrinsified()));
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ Register index_reg = index_.AsRegister<Register>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::arm::Thumb2Assembler::Lsl and
+ // art::arm::Thumb2Assembler::AddConstant below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Mov(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Lsl(index_reg, index_reg, TIMES_4);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ AddConstant(index_reg, index_reg, offset_);
+ } else {
+ DCHECK(instruction_->IsInvoke());
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegisterPair());
+ // UnsafeGet's offset location is a register pair, the low
+ // part contains the correct offset.
+ index = index_.ToLow();
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ LoadImmediate(calling_convention.GetRegisterAt(2), offset_);
+ }
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM"; }
+
+ private:
+ Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+ size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+ return static_cast<Register>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on ARM
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM : public SlowPathCode {
+ public:
+ ReadBarrierForRootSlowPathARM(HInstruction* instruction, Location out, Location root)
+ : instruction_(instruction), out_(out), root_(root) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+ arm_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+ arm_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ arm_codegen->Move32(out_, Location::RegisterLocation(R0));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ b(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM"; }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM);
+};
+
#undef __
#define __ down_cast<ArmAssembler*>(GetAssembler())->
@@ -500,7 +724,9 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph,
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
call_patches_(MethodReferenceComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+ relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ dex_cache_arrays_base_labels_(std::less<HArmDexCacheArraysBase*>(),
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
}
@@ -581,7 +807,7 @@ Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const {
LOG(FATAL) << "Unreachable type " << type;
}
- return Location();
+ return Location::NoLocation();
}
void CodeGeneratorARM::SetupBlockedRegisters(bool is_baseline) const {
@@ -820,7 +1046,7 @@ Location InvokeDexCallingConventionVisitorARM::GetNextLocation(Primitive::Type t
LOG(FATAL) << "Unexpected parameter type " << type;
break;
}
- return Location();
+ return Location::NoLocation();
}
Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type type) const {
@@ -847,7 +1073,7 @@ Location InvokeDexCallingConventionVisitorARM::GetReturnLocation(Primitive::Type
}
case Primitive::kPrimVoid:
- return Location();
+ return Location::NoLocation();
}
UNREACHABLE();
@@ -1240,26 +1466,19 @@ void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
__ b(true_label, final_condition);
}
-void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr,
- HCondition* condition,
- Label* true_target,
- Label* false_target,
- Label* always_true_target) {
+void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
+ Label* true_target_in,
+ Label* false_target_in) {
+ // Generated branching requires both targets to be explicit. If either of the
+ // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+ Label fallthrough_target;
+ Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+ Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
LocationSummary* locations = condition->GetLocations();
Location left = locations->InAt(0);
Location right = locations->InAt(1);
- // We don't want true_target as a nullptr.
- if (true_target == nullptr) {
- true_target = always_true_target;
- }
- bool falls_through = (false_target == nullptr);
-
- // FP compares don't like null false_targets.
- if (false_target == nullptr) {
- false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- }
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong:
@@ -1278,103 +1497,125 @@ void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HIf* if_instr,
LOG(FATAL) << "Unexpected compare type " << type;
}
- if (!falls_through) {
+ if (false_target != &fallthrough_target) {
__ b(false_target);
}
+
+ if (fallthrough_target.IsLinked()) {
+ __ Bind(&fallthrough_target);
+ }
}
void InstructionCodeGeneratorARM::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- if (cond->IsIntConstant()) {
+ Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ b(always_true_target);
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ b(true_target);
}
- return;
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ b(false_target);
+ }
}
- } else {
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
- // Condition has been materialized, compare the output to 0
- DCHECK(instruction->GetLocations()->InAt(0).IsRegister());
- __ CompareAndBranchIfNonZero(instruction->GetLocations()->InAt(0).AsRegister<Register>(),
- true_target);
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ // Condition has been materialized, compare the output to 0.
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
+ DCHECK(cond_val.IsRegister());
+ if (true_target == nullptr) {
+ __ CompareAndBranchIfZero(cond_val.AsRegister<Register>(), false_target);
} else {
- // Condition has not been materialized, use its inputs as the
- // comparison and its condition as the branch condition.
- Primitive::Type type =
- cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
- // Is this a long or FP comparison that has been folded into the HCondition?
- if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
- // Generate the comparison directly.
- GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
- true_target, false_target, always_true_target);
- return;
- }
+ __ CompareAndBranchIfNonZero(cond_val.AsRegister<Register>(), true_target);
+ }
+ } else {
+ // Condition has not been materialized. Use its inputs as the comparison and
+ // its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
+ // If this is a long or FP comparison that has been folded into
+ // the HCondition, generate the comparison directly.
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ GenerateCompareTestAndBranch(condition, true_target, false_target);
+ return;
+ }
- LocationSummary* locations = cond->GetLocations();
- DCHECK(locations->InAt(0).IsRegister()) << locations->InAt(0);
- Register left = locations->InAt(0).AsRegister<Register>();
- Location right = locations->InAt(1);
- if (right.IsRegister()) {
- __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
- } else {
- DCHECK(right.IsConstant());
- GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
- }
- __ b(true_target, ARMCondition(cond->AsCondition()->GetCondition()));
+ LocationSummary* locations = cond->GetLocations();
+ DCHECK(locations->InAt(0).IsRegister());
+ Register left = locations->InAt(0).AsRegister<Register>();
+ Location right = locations->InAt(1);
+ if (right.IsRegister()) {
+ __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+ } else {
+ DCHECK(right.IsConstant());
+ GenerateCompareWithImmediate(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+ }
+ if (true_target == nullptr) {
+ __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+ } else {
+ __ b(true_target, ARMCondition(condition->GetCondition()));
}
}
- if (false_target != nullptr) {
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
__ b(false_target);
}
}
void LocationsBuilderARM::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitIf(HIf* if_instr) {
- Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- Label* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderARM::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCode* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathARM(deoptimize);
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathARM(deoptimize);
codegen_->AddSlowPath(slow_path);
- Label* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderARM::VisitCondition(HCondition* cond) {
@@ -1683,10 +1924,18 @@ void LocationsBuilderARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
codegen_->GetAssembler(),
codegen_->GetInstructionSetFeatures());
if (intrinsic.TryDispatch(invoke)) {
+ if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+ invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+ }
return;
}
HandleInvoke(invoke);
+
+ // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+ if (invoke->HasPcRelativeDexCache()) {
+ invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+ }
}
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -1747,29 +1996,39 @@ void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorARM::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
- Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+ LocationSummary* locations = invoke->GetLocations();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ Register hidden_reg = locations->GetTemp(1).AsRegister<Register>();
uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
invoke->GetImtIndex() % mirror::Class::kImtSize, kArmPointerSize).Uint32Value();
- LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- // Set the hidden argument.
- __ LoadImmediate(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(),
- invoke->GetDexMethodIndex());
+ // Set the hidden argument. This is safe to do this here, as R12
+ // won't be modified thereafter, before the `blx` (call) instruction.
+ DCHECK_EQ(R12, hidden_reg);
+ __ LoadImmediate(hidden_reg, invoke->GetDexMethodIndex());
- // temp = object->GetClass();
if (receiver.IsStackSlot()) {
__ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex());
+ // /* HeapReference<Class> */ temp = temp->klass_
__ LoadFromOffset(kLoadWord, temp, temp, class_offset);
} else {
+ // /* HeapReference<Class> */ temp = receiver->klass_
__ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetImtEntryAt(method_offset);
- uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArmWordSize).Int32Value();
+ uint32_t entry_point =
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmWordSize).Int32Value();
__ LoadFromOffset(kLoadWord, temp, temp, method_offset);
// LR = temp->GetEntryPoint();
__ LoadFromOffset(kLoadWord, LR, temp, entry_point);
@@ -2173,6 +2432,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
conversion,
conversion->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
break;
case Primitive::kPrimDouble:
@@ -2181,6 +2441,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
conversion,
conversion->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
break;
default:
@@ -2226,6 +2487,7 @@ void InstructionCodeGeneratorARM::VisitTypeConversion(HTypeConversion* conversio
conversion,
conversion->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>();
break;
case Primitive::kPrimDouble:
@@ -2680,7 +2942,7 @@ void LocationsBuilderARM::VisitDiv(HDiv* div) {
case Primitive::kPrimInt: {
if (div->InputAt(1)->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
+ locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
int32_t abs_imm = std::abs(div->InputAt(1)->AsIntConstant()->GetValue());
if (abs_imm <= 1) {
@@ -2748,6 +3010,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(R0, out.AsRegister<Register>());
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), div, div->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
}
break;
}
@@ -2762,6 +3025,7 @@ void InstructionCodeGeneratorARM::VisitDiv(HDiv* div) {
DCHECK_EQ(R1, out.AsRegisterPairHigh<Register>());
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLdiv), div, div->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
break;
}
@@ -2804,7 +3068,7 @@ void LocationsBuilderARM::VisitRem(HRem* rem) {
case Primitive::kPrimInt: {
if (rem->InputAt(1)->IsConstant()) {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
+ locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
int32_t abs_imm = std::abs(rem->InputAt(1)->AsIntConstant()->GetValue());
if (abs_imm <= 1) {
@@ -2890,22 +3154,26 @@ void InstructionCodeGeneratorARM::VisitRem(HRem* rem) {
DCHECK_EQ(R1, out.AsRegister<Register>());
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pIdivmod), rem, rem->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
}
break;
}
case Primitive::kPrimLong: {
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod), rem, rem->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
break;
}
case Primitive::kPrimFloat: {
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmodf), rem, rem->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
break;
}
case Primitive::kPrimDouble: {
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pFmod), rem, rem->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
break;
}
@@ -2975,17 +3243,29 @@ void LocationsBuilderARM::HandleShift(HBinaryOperation* op) {
switch (op->GetResultType()) {
case Primitive::kPrimInt: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrConstant(op->InputAt(1)));
- // Make the output overlap, as it will be used to hold the masked
- // second input.
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Make the output overlap, as it will be used to hold the masked
+ // second input.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
break;
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->AddTemp(Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister());
+ if (op->InputAt(1)->IsConstant()) {
+ locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+ // For simplicity, use kOutputOverlap even though we only require that low registers
+ // don't clash with high registers which the register allocator currently guarantees.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ }
break;
}
default:
@@ -3006,9 +3286,9 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
case Primitive::kPrimInt: {
Register out_reg = out.AsRegister<Register>();
Register first_reg = first.AsRegister<Register>();
- // Arm doesn't mask the shift count so we need to do it ourselves.
if (second.IsRegister()) {
Register second_reg = second.AsRegister<Register>();
+ // Arm doesn't mask the shift count so we need to do it ourselves.
__ and_(out_reg, second_reg, ShifterOperand(kMaxIntShiftValue));
if (op->IsShl()) {
__ Lsl(out_reg, first_reg, out_reg);
@@ -3036,57 +3316,115 @@ void InstructionCodeGeneratorARM::HandleShift(HBinaryOperation* op) {
Register o_h = out.AsRegisterPairHigh<Register>();
Register o_l = out.AsRegisterPairLow<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
Register high = first.AsRegisterPairHigh<Register>();
Register low = first.AsRegisterPairLow<Register>();
- Register second_reg = second.AsRegister<Register>();
-
- if (op->IsShl()) {
- __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
- // Shift the high part
- __ Lsl(o_h, high, o_l);
- // Shift the low part and `or` what overflew on the high part
- __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
- __ Lsr(temp, low, temp);
- __ orr(o_h, o_h, ShifterOperand(temp));
- // If the shift is > 32 bits, override the high part
- __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Lsl(o_h, low, temp, PL);
- // Shift the low part
- __ Lsl(o_l, low, o_l);
- } else if (op->IsShr()) {
- __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
- // Shift the low part
- __ Lsr(o_l, low, o_h);
- // Shift the high part and `or` what underflew on the low part
- __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ Lsl(temp, high, temp);
- __ orr(o_l, o_l, ShifterOperand(temp));
- // If the shift is > 32 bits, override the low part
- __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Asr(o_l, high, temp, PL);
- // Shift the high part
- __ Asr(o_h, high, o_h);
+ if (second.IsRegister()) {
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+
+ Register second_reg = second.AsRegister<Register>();
+
+ if (op->IsShl()) {
+ __ and_(o_l, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // Shift the high part
+ __ Lsl(o_h, high, o_l);
+ // Shift the low part and `or` what overflew on the high part
+ __ rsb(temp, o_l, ShifterOperand(kArmBitsPerWord));
+ __ Lsr(temp, low, temp);
+ __ orr(o_h, o_h, ShifterOperand(temp));
+ // If the shift is > 32 bits, override the high part
+ __ subs(temp, o_l, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Lsl(o_h, low, temp, PL);
+ // Shift the low part
+ __ Lsl(o_l, low, o_l);
+ } else if (op->IsShr()) {
+ __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // Shift the low part
+ __ Lsr(o_l, low, o_h);
+ // Shift the high part and `or` what underflew on the low part
+ __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ Lsl(temp, high, temp);
+ __ orr(o_l, o_l, ShifterOperand(temp));
+ // If the shift is > 32 bits, override the low part
+ __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Asr(o_l, high, temp, PL);
+ // Shift the high part
+ __ Asr(o_h, high, o_h);
+ } else {
+ __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
+ // same as Shr except we use `Lsr`s and not `Asr`s
+ __ Lsr(o_l, low, o_h);
+ __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ Lsl(temp, high, temp);
+ __ orr(o_l, o_l, ShifterOperand(temp));
+ __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
+ __ it(PL);
+ __ Lsr(o_l, high, temp, PL);
+ __ Lsr(o_h, high, o_h);
+ }
} else {
- __ and_(o_h, second_reg, ShifterOperand(kMaxLongShiftValue));
- // same as Shr except we use `Lsr`s and not `Asr`s
- __ Lsr(o_l, low, o_h);
- __ rsb(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ Lsl(temp, high, temp);
- __ orr(o_l, o_l, ShifterOperand(temp));
- __ subs(temp, o_h, ShifterOperand(kArmBitsPerWord));
- __ it(PL);
- __ Lsr(o_l, high, temp, PL);
- __ Lsr(o_h, high, o_h);
+ // Register allocator doesn't create partial overlap.
+ DCHECK_NE(o_l, high);
+ DCHECK_NE(o_h, low);
+ int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+ uint32_t shift_value = static_cast<uint32_t>(cst & kMaxLongShiftValue);
+ if (shift_value > 32) {
+ if (op->IsShl()) {
+ __ Lsl(o_h, low, shift_value - 32);
+ __ LoadImmediate(o_l, 0);
+ } else if (op->IsShr()) {
+ __ Asr(o_l, high, shift_value - 32);
+ __ Asr(o_h, high, 31);
+ } else {
+ __ Lsr(o_l, high, shift_value - 32);
+ __ LoadImmediate(o_h, 0);
+ }
+ } else if (shift_value == 32) {
+ if (op->IsShl()) {
+ __ mov(o_h, ShifterOperand(low));
+ __ LoadImmediate(o_l, 0);
+ } else if (op->IsShr()) {
+ __ mov(o_l, ShifterOperand(high));
+ __ Asr(o_h, high, 31);
+ } else {
+ __ mov(o_l, ShifterOperand(high));
+ __ LoadImmediate(o_h, 0);
+ }
+ } else if (shift_value == 1) {
+ if (op->IsShl()) {
+ __ Lsls(o_l, low, 1);
+ __ adc(o_h, high, ShifterOperand(high));
+ } else if (op->IsShr()) {
+ __ Asrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ } else {
+ __ Lsrs(o_h, high, 1);
+ __ Rrx(o_l, low);
+ }
+ } else {
+ DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+ if (op->IsShl()) {
+ __ Lsl(o_h, high, shift_value);
+ __ orr(o_h, o_h, ShifterOperand(low, LSR, 32 - shift_value));
+ __ Lsl(o_l, low, shift_value);
+ } else if (op->IsShr()) {
+ __ Lsr(o_l, low, shift_value);
+ __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+ __ Asr(o_h, high, shift_value);
+ } else {
+ __ Lsr(o_l, low, shift_value);
+ __ orr(o_l, o_l, ShifterOperand(high, LSL, 32 - shift_value));
+ __ Lsr(o_h, high, shift_value);
+ }
+ }
}
break;
}
default:
LOG(FATAL) << "Unexpected operation type " << type;
+ UNREACHABLE();
}
}
@@ -3118,20 +3456,19 @@ void LocationsBuilderARM::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(Location::RegisterLocation(R0));
}
void InstructionCodeGeneratorARM::VisitNewInstance(HNewInstance* instruction) {
- InvokeRuntimeCallingConvention calling_convention;
- __ LoadImmediate(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
void LocationsBuilderARM::VisitNewArray(HNewArray* instruction) {
@@ -3153,6 +3490,7 @@ void InstructionCodeGeneratorARM::VisitNewArray(HNewArray* instruction) {
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
}
void LocationsBuilderARM::VisitParameterValue(HParameterValue* instruction) {
@@ -3334,6 +3672,9 @@ void InstructionCodeGeneratorARM::GenerateWideAtomicLoad(Register addr,
Register out_lo,
Register out_hi) {
if (offset != 0) {
+ // Ensure `out_lo` is different from `addr`, so that loading
+ // `offset` into `out_lo` does not clutter `addr`.
+ DCHECK_NE(out_lo, addr);
__ LoadImmediate(out_lo, offset);
__ add(IP, addr, ShifterOperand(out_lo));
addr = IP;
@@ -3521,14 +3862,26 @@ void InstructionCodeGeneratorARM::HandleFieldSet(HInstruction* instruction,
void LocationsBuilderARM::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_field_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
bool volatile_for_double = field_info.IsVolatile()
&& (field_info.GetFieldType() == Primitive::kPrimDouble)
&& !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
- bool overlap = field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong);
+ // The output overlaps in case of volatile long: we don't want the
+ // code generated by GenerateWideAtomicLoad to overwrite the
+ // object's location. Likewise, in the case of an object field get
+ // with read barriers enabled, we do not want the load to overwrite
+ // the object's location, as we need it to emit the read barrier.
+ bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+ object_field_get_with_read_barrier;
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
@@ -3594,7 +3947,8 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- Register base = locations->InAt(0).AsRegister<Register>();
+ Location base_loc = locations->InAt(0);
+ Register base = base_loc.AsRegister<Register>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
@@ -3674,7 +4028,7 @@ void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
}
if (field_type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
}
}
@@ -3818,20 +4172,31 @@ void InstructionCodeGeneratorARM::VisitNullCheck(HNullCheck* instruction) {
}
void LocationsBuilderARM::VisitArrayGet(HArrayGet* instruction) {
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Location index = locations->InAt(1);
Primitive::Type type = instruction->GetType();
@@ -3894,8 +4259,9 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
Register out = locations->Out().AsRegister<Register>();
if (index.IsConstant()) {
@@ -3958,8 +4324,17 @@ void InstructionCodeGeneratorARM::VisitArrayGet(HArrayGet* instruction) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- Register out = locations->Out().AsRegister<Register>();
- __ MaybeUnpoisonHeapReference(out);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ Location out = locations->Out();
+ if (index.IsConstant()) {
+ uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+ } else {
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+ }
}
}
@@ -3968,11 +4343,16 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+ bool object_array_set_with_read_barrier =
+ kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
+
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(value_type)) {
@@ -3980,7 +4360,6 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
} else {
locations->SetInAt(2, Location::RequiresRegister());
}
-
if (needs_write_barrier) {
// Temporary registers for the write barrier.
locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
@@ -3990,10 +4369,11 @@ void LocationsBuilderARM::VisitArraySet(HArraySet* instruction) {
void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register array = locations->InAt(0).AsRegister<Register>();
+ Location array_loc = locations->InAt(0);
+ Register array = array_loc.AsRegister<Register>();
Location index = locations->InAt(1);
Primitive::Type value_type = instruction->GetComponentType();
- bool may_need_runtime_call = locations->CanCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
@@ -4030,7 +4410,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
case Primitive::kPrimNot: {
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
- Register value = locations->InAt(2).AsRegister<Register>();
+ Location value_loc = locations->InAt(2);
+ Register value = value_loc.AsRegister<Register>();
Register source = value;
if (instruction->InputAt(2)->IsNullConstant()) {
@@ -4044,6 +4425,8 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ add(IP, array, ShifterOperand(index.AsRegister<Register>(), LSL, TIMES_4));
__ StoreToOffset(kStoreWord, source, IP, data_offset);
}
+ DCHECK(!needs_write_barrier);
+ DCHECK(!may_need_runtime_call_for_type_check);
break;
}
@@ -4056,7 +4439,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
Label done;
SlowPathCode* slow_path = nullptr;
- if (may_need_runtime_call) {
+ if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
@@ -4076,23 +4459,63 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ Bind(&non_zero);
}
- __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ MaybeUnpoisonHeapReference(temp1);
- __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
- __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
- // No need to poison/unpoison, we're comparing two poisoined references.
- __ cmp(temp1, ShifterOperand(temp2));
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- Label do_put;
- __ b(&do_put, EQ);
- __ MaybeUnpoisonHeapReference(temp1);
- __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
- // No need to poison/unpoison, we're comparing against null.
- __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
- __ Bind(&do_put);
+ if (kEmitCompilerReadBarrier) {
+ // When read barriers are enabled, the type checking
+ // instrumentation requires two read barriers:
+ //
+ // __ Mov(temp2, temp1);
+ // // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ // __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp1_loc, temp1_loc, temp2_loc, component_offset);
+ //
+ // // /* HeapReference<Class> */ temp2 = value->klass_
+ // __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp1_loc);
+ //
+ // __ cmp(temp1, ShifterOperand(temp2));
+ //
+ // However, the second read barrier may trash `temp`, as it
+ // is a temporary register, and as such would not be saved
+ // along with live registers before calling the runtime (nor
+ // restored afterwards). So in this case, we bail out and
+ // delegate the work to the array set slow path.
+ //
+ // TODO: Extend the register allocator to support a new
+ // "(locally) live temp" location so as to avoid always
+ // going into the slow path when read barriers are enabled.
+ __ b(slow_path->GetEntryLabel());
} else {
- __ b(slow_path->GetEntryLabel(), NE);
+ // /* HeapReference<Class> */ temp1 = array->klass_
+ __ LoadFromOffset(kLoadWord, temp1, array, class_offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->component_type_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+ // If heap poisoning is enabled, no need to unpoison `temp1`
+ // nor `temp2`, as we are comparing two poisoned references.
+ __ cmp(temp1, ShifterOperand(temp2));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ Label do_put;
+ __ b(&do_put, EQ);
+ // If heap poisoning is enabled, the `temp1` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp1);
+
+ // /* HeapReference<Class> */ temp1 = temp1->super_class_
+ __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp1`, as we are comparing against null below.
+ __ CompareAndBranchIfNonZero(temp1, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ b(slow_path->GetEntryLabel(), NE);
+ }
}
}
@@ -4116,7 +4539,7 @@ void InstructionCodeGeneratorARM::VisitArraySet(HArraySet* instruction) {
__ StoreToOffset(kStoreWord, source, IP, data_offset);
}
- if (!may_need_runtime_call) {
+ if (!may_need_runtime_call_for_type_check) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -4545,7 +4968,8 @@ void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(R0));
+ Location::RegisterLocation(R0),
+ /* code_generator_supports_read_barrier */ true);
}
void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
@@ -4556,33 +4980,59 @@ void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) {
cls,
cls->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
+
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- __ LoadFromOffset(
- kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ AddConstant(out, current_method, declaring_class_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+ }
} else {
- DCHECK(cls->CanCallRuntime());
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ LoadFromOffset(kLoadWord,
out,
current_method,
ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
- __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
- // TODO: We will need a read barrier here.
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
+ size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+ __ AddConstant(out, out, cache_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
} else {
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+ }
+
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (!cls->IsInDexCache()) {
+ __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -4628,13 +5078,35 @@ void InstructionCodeGeneratorARM::VisitLoadString(HLoadString* load) {
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = load->GetLocations();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
- __ LoadFromOffset(
- kLoadWord, out, current_method, ArtMethod::DeclaringClassOffset().Int32Value());
+
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ AddConstant(out, current_method, declaring_class_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ LoadFromOffset(kLoadWord, out, current_method, declaring_class_offset);
+ }
+
+ // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ LoadFromOffset(kLoadWord, out, out, mirror::Class::DexCacheStringsOffset().Int32Value());
- __ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(load->GetStringIndex()));
- // TODO: We will need a read barrier here.
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::String>* */ out = &out[string_index]
+ __ AddConstant(out, out, cache_offset);
+ // /* mirror::String* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ __ LoadFromOffset(kLoadWord, out, out, cache_offset);
+ }
+
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -4673,45 +5145,50 @@ void LocationsBuilderARM::VisitThrow(HThrow* instruction) {
void InstructionCodeGeneratorARM::VisitThrow(HThrow* instruction) {
codegen_->InvokeRuntime(
QUICK_ENTRY_POINT(pDeliverException), instruction, instruction->GetDexPc(), nullptr);
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- // The out register is used as a temporary, so it overlaps with the inputs.
- // Note that TypeCheckSlowPathARM uses this register too.
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetOut(Location::RegisterLocation(R0));
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // The "out" register is used as a temporary, so it overlaps with the inputs.
+ // Note that TypeCheckSlowPathARM uses this register too.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ // When read barriers are enabled, we need a temporary register for
+ // some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4725,15 +5202,9 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ CompareAndBranchIfZero(obj, &zero);
}
- // In case of an interface/unresolved check, we put the object class into the object register.
- // This is safe, as the register is caller-save, and the object must be in another
- // register if it survives the runtime call.
- Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
- (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
- ? obj
- : out;
- __ LoadFromOffset(kLoadWord, target, obj, class_offset);
- __ MaybeUnpoisonHeapReference(target);
+ // /* HeapReference<Class> */ out = obj->klass_
+ __ LoadFromOffset(kLoadWord, out, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
switch (instruction->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck: {
@@ -4744,13 +5215,23 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ b(&done);
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
Label loop;
__ Bind(&loop);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ LoadFromOffset(kLoadWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ cmp(out, ShifterOperand(cls));
@@ -4761,14 +5242,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
Label loop, success;
__ Bind(&loop);
__ cmp(out, ShifterOperand(cls));
__ b(&success, EQ);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ LoadFromOffset(kLoadWord, out, out, super_offset);
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ CompareAndBranchIfNonZero(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ b(&done);
@@ -4779,14 +5270,24 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
Label exact_check;
__ cmp(out, ShifterOperand(cls));
__ b(&exact_check, EQ);
- // Otherwise, we need to check that the object's class is a non primitive array.
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->component_type_
__ LoadFromOffset(kLoadWord, out, out, component_offset);
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
// If `out` is null, we use it for the result, and jump to `done`.
__ CompareAndBranchIfZero(out, &done);
__ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -4797,11 +5298,12 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
__ b(&done);
break;
}
+
case TypeCheckKind::kArrayCheck: {
__ cmp(out, ShifterOperand(cls));
DCHECK(locations->OnlyCallsOnSlowPath());
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
- instruction, /* is_fatal */ false);
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+ /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ b(slow_path->GetEntryLabel(), NE);
__ LoadImmediate(out, 1);
@@ -4810,13 +5312,25 @@ void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- default: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved & interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ b(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
__ b(&done);
}
@@ -4842,57 +5356,61 @@ void LocationsBuilderARM::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- // Note that TypeCheckSlowPathARM uses this register too.
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Note that TypeCheckSlowPathARM uses this "temp" register too.
+ locations->AddTemp(Location::RequiresRegister());
+ // When read barriers are enabled, we need an additional temporary
+ // register for some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
locations->AddTemp(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
}
void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Register cls = locations->InAt(1).AsRegister<Register>();
- Register temp = locations->WillCall()
- ? Register(kNoRegister)
- : locations->GetTemp(0).AsRegister<Register>();
-
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- SlowPathCode* slow_path = nullptr;
- if (!locations->WillCall()) {
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(
- instruction, !locations->CanCall());
- codegen_->AddSlowPath(slow_path);
- }
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ SlowPathCode* type_check_slow_path =
+ new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
+ is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(type_check_slow_path);
Label done;
// Avoid null check if we know obj is not null.
@@ -4900,76 +5418,159 @@ void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
__ CompareAndBranchIfZero(obj, &done);
}
- if (locations->WillCall()) {
- __ LoadFromOffset(kLoadWord, obj, obj, class_offset);
- __ MaybeUnpoisonHeapReference(obj);
- } else {
- __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
- __ MaybeUnpoisonHeapReference(temp);
- }
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
__ cmp(temp, ShifterOperand(cls));
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ b(slow_path->GetEntryLabel(), NE);
+ __ b(type_check_slow_path->GetEntryLabel(), NE);
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- Label loop;
+ Label loop, compare_classes;
__ Bind(&loop);
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
- // Jump to the slow path to throw the exception.
- __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // to the `compare_classes` label to compare it with the checked
+ // class.
+ __ CompareAndBranchIfNonZero(temp, &compare_classes);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ b(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&compare_classes);
__ cmp(temp, ShifterOperand(cls));
__ b(&loop, NE);
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
Label loop;
__ Bind(&loop);
__ cmp(temp, ShifterOperand(cls));
__ b(&done, EQ);
+
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ LoadFromOffset(kLoadWord, temp, temp, super_offset);
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // back at the beginning of the loop.
__ CompareAndBranchIfNonZero(temp, &loop);
- // Jump to the slow path to throw the exception.
- __ b(slow_path->GetEntryLabel());
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ b(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
+ Label check_non_primitive_component_type;
__ cmp(temp, ShifterOperand(cls));
__ b(&done, EQ);
- // Otherwise, we need to check that the object's class is a non primitive array.
+
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->component_type_
__ LoadFromOffset(kLoadWord, temp, temp, component_offset);
- __ MaybeUnpoisonHeapReference(temp);
- __ CompareAndBranchIfZero(temp, slow_path->GetEntryLabel());
+ codegen_->MaybeGenerateReadBarrier(
+ instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+ // If the component type is not null (i.e. the object is indeed
+ // an array), jump to label `check_non_primitive_component_type`
+ // to further check that this component type is not a primitive
+ // type.
+ __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ b(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&check_non_primitive_component_type);
__ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
- static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ CompareAndBranchIfNonZero(temp, slow_path->GetEntryLabel());
+ static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+ __ CompareAndBranchIfZero(temp, &done);
+ // Same comment as above regarding `temp` and the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ LoadFromOffset(kLoadWord, temp, obj, class_offset);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ b(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- default:
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ // We always go into the type check slow path for the unresolved &
+ // interface check cases.
+ //
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ b(type_check_slow_path->GetEntryLabel());
break;
}
__ Bind(&done);
- if (slow_path != nullptr) {
- __ Bind(slow_path->GetExitLabel());
- }
+ __ Bind(type_check_slow_path->GetExitLabel());
}
void LocationsBuilderARM::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -4985,6 +5586,11 @@ void InstructionCodeGeneratorARM::VisitMonitorOperation(HMonitorOperation* instr
instruction,
instruction->GetDexPc(),
nullptr);
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
}
void LocationsBuilderARM::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction, AND); }
@@ -5143,19 +5749,85 @@ void InstructionCodeGeneratorARM::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void CodeGeneratorARM::GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathARM(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ // TODO: When read barrier has a fast path, add it here.
+ /* Currently the read barrier call is inserted after the original load.
+ * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+ * original load. This load-load ordering is required by the read barrier.
+ * The fast path/slow path (for Baker's algorithm) should look like:
+ *
+ * bool isGray = obj.LockWord & kReadBarrierMask;
+ * lfence; // load fence or artificial data dependence to prevent load-load reordering
+ * ref = obj.field; // this is the original load
+ * if (isGray) {
+ * ref = Mark(ref); // ideally the slow path just does Mark(ref)
+ * }
+ */
+
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM::MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<Register>());
+ }
+}
+
+void CodeGeneratorARM::GenerateReadBarrierForRoot(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ // TODO: Implement a fast path for ReadBarrierForRoot, performing
+ // the following operation (for Baker's algorithm):
+ //
+ // if (thread.tls32_.is_gc_marking) {
+ // root = Mark(root);
+ // }
+
+ __ b(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
MethodReference target_method) {
- if (desired_dispatch_info.method_load_kind ==
- HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
- // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
- return HInvokeStaticOrDirect::DispatchInfo {
- HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u,
- 0u
- };
- }
if (desired_dispatch_info.code_ptr_location ==
HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative) {
const DexFile& outer_dex_file = GetGraph()->GetDexFile();
@@ -5178,6 +5850,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM::GetSupportedInvokeStaticOr
return desired_dispatch_info;
}
+Register CodeGeneratorARM::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+ Register temp) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ if (!invoke->GetLocations()->Intrinsified()) {
+ return location.AsRegister<Register>();
+ }
+ // For intrinsics we allow any location, so it may be on the stack.
+ if (!location.IsRegister()) {
+ __ LoadFromOffset(kLoadWord, temp, SP, location.GetStackIndex());
+ return temp;
+ }
+ // For register locations, check if the register was saved. If so, get it from the stack.
+ // Note: There is a chance that the register was saved but not overwritten, so we could
+ // save one load. However, since this is just an intrinsic slow path we prefer this
+ // simple and more robust approach rather that trying to determine if that's the case.
+ SlowPathCode* slow_path = GetCurrentSlowPath();
+ DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
+ if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+ int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+ __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
+ return temp;
+ }
+ return location.AsRegister<Register>();
+}
+
void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
// For better instruction scheduling we load the direct code pointer before the method pointer.
switch (invoke->GetCodePtrLocation()) {
@@ -5200,7 +5898,7 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, invoke->GetStringInitOffset());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -5209,13 +5907,17 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ LoadLiteral(temp.AsRegister<Register>(),
DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
break;
- case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
- // TODO: Implement this type.
- // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
- LOG(FATAL) << "Unsupported";
- UNREACHABLE();
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+ HArmDexCacheArraysBase* base =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
+ __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register method_reg;
Register reg = temp.AsRegister<Register>();
if (current_method.IsRegister()) {
@@ -5226,10 +5928,11 @@ void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
method_reg = reg;
__ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
}
- // temp = current_method->dex_cache_resolved_methods_;
- __ LoadFromOffset(
- kLoadWord, reg, method_reg, ArtMethod::DexCacheResolvedMethodsOffset(
- kArmPointerSize).Int32Value());
+ // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+ __ LoadFromOffset(kLoadWord,
+ reg,
+ method_reg,
+ ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
// temp = temp[index_in_cache]
uint32_t index_in_cache = invoke->GetTargetMethod().dex_method_index;
__ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
@@ -5270,13 +5973,24 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
Register temp = temp_location.AsRegister<Register>();
uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
+
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ Register receiver = calling_convention.GetRegisterAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- // temp = object->GetClass();
- DCHECK(receiver.IsRegister());
- __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
@@ -5290,7 +6004,11 @@ void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
- size_t size = method_patches_.size() + call_patches_.size() + relative_call_patches_.size();
+ size_t size =
+ method_patches_.size() +
+ call_patches_.size() +
+ relative_call_patches_.size() +
+ /* MOVW+MOVT for each base */ 2u * dex_cache_arrays_base_labels_.size();
linker_patches->reserve(size);
for (const auto& entry : method_patches_) {
const MethodReference& target_method = entry.first;
@@ -5316,6 +6034,28 @@ void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patche
info.target_method.dex_file,
info.target_method.dex_method_index));
}
+ for (const auto& pair : dex_cache_arrays_base_labels_) {
+ HArmDexCacheArraysBase* base = pair.first;
+ const DexCacheArraysBaseLabels* labels = &pair.second;
+ const DexFile& dex_file = base->GetDexFile();
+ size_t base_element_offset = base->GetElementOffset();
+ DCHECK(labels->add_pc_label.IsBound());
+ uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(labels->add_pc_label.Position());
+ // Add MOVW patch.
+ DCHECK(labels->movw_label.IsBound());
+ uint32_t movw_offset = dchecked_integral_cast<uint32_t>(labels->movw_label.Position());
+ linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movw_offset,
+ &dex_file,
+ add_pc_offset,
+ base_element_offset));
+ // Add MOVT patch.
+ DCHECK(labels->movt_label.IsBound());
+ uint32_t movt_offset = dchecked_integral_cast<uint32_t>(labels->movt_label.Position());
+ linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(movt_offset,
+ &dex_file,
+ add_pc_offset,
+ base_element_offset));
+ }
}
Literal* CodeGeneratorARM::DeduplicateMethodLiteral(MethodReference target_method,
@@ -5427,6 +6167,23 @@ void InstructionCodeGeneratorARM::VisitPackedSwitch(HPackedSwitch* switch_instr)
}
}
+void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
+ locations->SetOut(Location::RequiresRegister());
+ codegen_->AddDexCacheArraysBase(base);
+}
+
+void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+ Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
+ CodeGeneratorARM::DexCacheArraysBaseLabels* labels = codegen_->GetDexCacheArraysBaseLabels(base);
+ __ BindTrackedLabel(&labels->movw_label);
+ __ movw(base_reg, 0u);
+ __ BindTrackedLabel(&labels->movt_label);
+ __ movt(base_reg, 0u);
+ __ BindTrackedLabel(&labels->add_pc_label);
+ __ add(base_reg, base_reg, ShifterOperand(PC));
+}
+
void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
if (!trg.IsValid()) {
DCHECK(type == Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index cef1095c5d..193add2541 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -228,15 +228,13 @@ class InstructionCodeGeneratorARM : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
+ Label* false_target);
void GenerateCompareWithImmediate(Register left, int32_t right);
- void GenerateCompareTestAndBranch(HIf* if_instr,
- HCondition* condition,
+ void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
+ Label* false_target);
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
@@ -375,8 +373,83 @@ class CodeGeneratorARM : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ // The PC-relative base address is loaded with three instructions, MOVW+MOVT
+ // to load the offset to base_reg and then ADD base_reg, PC. The offset is
+ // calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we
+ // currently emit these 3 instructions together, instruction scheduling could
+ // split this sequence apart, so we keep separate labels for each of them.
+ struct DexCacheArraysBaseLabels {
+ DexCacheArraysBaseLabels() = default;
+ DexCacheArraysBaseLabels(DexCacheArraysBaseLabels&& other) = default;
+
+ Label movw_label;
+ Label movt_label;
+ Label add_pc_label;
+ };
+
+ void AddDexCacheArraysBase(HArmDexCacheArraysBase* base) {
+ DexCacheArraysBaseLabels labels;
+ dex_cache_arrays_base_labels_.Put(base, std::move(labels));
+ }
+
+ DexCacheArraysBaseLabels* GetDexCacheArraysBaseLabels(HArmDexCacheArraysBase* base) {
+ auto it = dex_cache_arrays_base_labels_.find(base);
+ DCHECK(it != dex_cache_arrays_base_labels_.end());
+ return &it->second;
+ }
+
+ // Generate a read barrier for a heap reference within `instruction`.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap reference.
+ // If heap poisoning is enabled, also unpoison the reference in `out`.
+ void MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction`.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
private:
+ Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
using MethodToLiteralMap = ArenaSafeMap<MethodReference, Literal*, MethodReferenceComparator>;
+ using DexCacheArraysBaseToLabelsMap = ArenaSafeMap<HArmDexCacheArraysBase*,
+ DexCacheArraysBaseLabels,
+ std::less<HArmDexCacheArraysBase*>>;
Literal* DeduplicateMethodLiteral(MethodReference target_method, MethodToLiteralMap* map);
Literal* DeduplicateMethodAddressLiteral(MethodReference target_method);
@@ -398,6 +471,8 @@ class CodeGeneratorARM : public CodeGenerator {
// Using ArenaDeque<> which retains element addresses on push/emplace_back().
ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+ DexCacheArraysBaseToLabelsMap dex_cache_arrays_base_labels_;
+
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM);
};
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b0be446174..04acd9d32c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -42,6 +42,9 @@ using namespace vixl; // NOLINT(build/namespaces)
namespace art {
+template<class MirrorType>
+class GcRoot;
+
namespace arm64 {
using helpers::CPURegisterFrom;
@@ -68,6 +71,10 @@ using helpers::ARM64EncodableConstantOrRegister;
using helpers::ArtVixlRegCodeCoherentForRegSet;
static constexpr int kCurrentMethodStackOffset = 0;
+// The compare/jump sequence will generate about (2 * num_entries + 1) instructions. While jump
+// table version generates 7 instructions and num_entries literals. Compare/jump sequence will
+// generates less code/data with a small num_entries.
+static constexpr uint32_t kPackedSwitchJumpTableThreshold = 6;
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
@@ -427,15 +434,6 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
__ Bind(GetEntryLabel());
- if (instruction_->IsCheckCast()) {
- // The codegen for the instruction overwrites `temp`, so put it back in place.
- Register obj = InputRegisterAt(instruction_, 0);
- Register temp = WRegisterFrom(locations->GetTemp(0));
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- __ Ldr(temp, HeapOperand(obj, class_offset));
- arm64_codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
- }
-
if (!is_fatal_) {
SaveLiveRegisters(codegen, locations);
}
@@ -450,11 +448,11 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
if (instruction_->IsInstanceOf()) {
arm64_codegen->InvokeRuntime(
QUICK_ENTRY_POINT(pInstanceofNonTrivial), instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
+ const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
- CheckEntrypointTypes<kQuickInstanceofNonTrivial, uint32_t,
- const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -490,6 +488,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
uint32_t dex_pc = deoptimize->GetDexPc();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
@@ -545,6 +544,293 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
};
+void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
+ uint32_t num_entries = switch_instr_->GetNumEntries();
+ DCHECK_GE(num_entries, kPackedSwitchJumpTableThreshold);
+
+ // We are about to use the assembler to place literals directly. Make sure we have enough
+ // underlying code buffer and we have generated the jump table with right size.
+ CodeBufferCheckScope scope(codegen->GetVIXLAssembler(), num_entries * sizeof(int32_t),
+ CodeBufferCheckScope::kCheck, CodeBufferCheckScope::kExactSize);
+
+ __ Bind(&table_start_);
+ const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ vixl::Label* target_label = codegen->GetLabelOf(successors[i]);
+ DCHECK(target_label->IsBound());
+ ptrdiff_t jump_offset = target_label->location() - table_start_.location();
+ DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+ DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+ Literal<int32_t> literal(jump_offset);
+ __ place(&literal);
+ }
+}
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierForHeapReferenceSlowPathARM64(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : instruction_(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ Ldr(out, HeapOperand(out, class_offset);
+ // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+ DCHECK(!instruction_->IsInvoke() ||
+ (instruction_->IsInvokeStaticOrDirect() &&
+ instruction_->GetLocations()->Intrinsified()));
+
+ __ Bind(GetEntryLabel());
+
+ // Note: In the case of a HArrayGet instruction, when the base
+ // address is a HArm64IntermediateAddress instruction, it does not
+ // point to the array object itself, but to an offset within this
+ // object. However, the read barrier entry point needs the array
+ // object address to be passed as first argument. So we
+ // temporarily set back `obj_` to that address, and restore its
+ // initial value later.
+ if (instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+ if (kIsDebugBuild) {
+ HArm64IntermediateAddress* intermediate_address =
+ instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+ uint32_t intermediate_address_offset =
+ intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+ DCHECK_EQ(intermediate_address_offset, offset_);
+ DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+ }
+ Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+ __ Sub(obj_reg, obj_reg, offset_);
+ }
+
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ Register index_reg = RegisterFrom(index_, Primitive::kPrimInt);
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg()));
+ if (codegen->IsCoreCalleeSaveRegister(index_.reg())) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to vixl::MacroAssembler::Lsl and
+ // vixl::MacroAssembler::Mov below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ Mov(free_reg.W(), index_reg);
+ index_reg = free_reg;
+ index = LocationFrom(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type));
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ Add(index_reg, index_reg, Operand(offset_));
+ } else {
+ DCHECK(instruction_->IsInvoke());
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegisterPair());
+ // UnsafeGet's offset location is a register pair, the low
+ // part contains the correct offset.
+ index = index_.ToLow();
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ LocationFrom(calling_convention.GetRegisterAt(0)),
+ type,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ LocationFrom(calling_convention.GetRegisterAt(1)),
+ type,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ LocationFrom(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ arm64_codegen->MoveConstant(LocationFrom(calling_convention.GetRegisterAt(2)), offset_);
+ }
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+
+ // Restore the value of `obj_` when it corresponds to a
+ // HArm64IntermediateAddress instruction.
+ if (instruction_->IsArrayGet() &&
+ instruction_->AsArrayGet()->GetArray()->IsArm64IntermediateAddress()) {
+ if (kIsDebugBuild) {
+ HArm64IntermediateAddress* intermediate_address =
+ instruction_->AsArrayGet()->GetArray()->AsArm64IntermediateAddress();
+ uint32_t intermediate_address_offset =
+ intermediate_address->GetOffset()->AsIntConstant()->GetValueAsUint64();
+ DCHECK_EQ(intermediate_address_offset, offset_);
+ DCHECK_EQ(mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value(), offset_);
+ }
+ Register obj_reg = RegisterFrom(obj_, Primitive::kPrimInt);
+ __ Add(obj_reg, obj_reg, offset_);
+ }
+
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+
+ private:
+ Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(XRegisterFrom(ref_).code());
+ size_t obj = static_cast<int>(XRegisterFrom(obj_).code());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+ return Register(VIXLRegCodeFromART(i), kXRegSize);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on ARM64
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free register";
+ UNREACHABLE();
+ }
+
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathARM64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ ReadBarrierForRootSlowPathARM64(HInstruction* instruction, Location out, Location root)
+ : instruction_(instruction), out_(out), root_(root) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Primitive::Type type = Primitive::kPrimNot;
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ // The argument of the ReadBarrierForRootSlow is not a managed
+ // reference (`mirror::Object*`), but a `GcRoot<mirror::Object>*`;
+ // thus we need a 64-bit move here, and we cannot use
+ //
+ // arm64_codegen->MoveLocation(
+ // LocationFrom(calling_convention.GetRegisterAt(0)),
+ // root_,
+ // type);
+ //
+ // which would emit a 32-bit move, as `type` is a (32-bit wide)
+ // reference type (`Primitive::kPrimNot`).
+ __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_));
+ arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ arm64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+ RestoreLiveRegisters(codegen, locations);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathARM64);
+};
+
#undef __
Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) {
@@ -587,6 +873,7 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
compiler_options,
stats),
block_labels_(nullptr),
+ jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetArena(), this),
@@ -598,15 +885,21 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
call_patches_(MethodReferenceComparator(),
graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+ pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
AddAllocatedRegister(LocationFrom(lr));
}
-#undef __
#define __ GetVIXLAssembler()->
+void CodeGeneratorARM64::EmitJumpTables() {
+ for (auto jump_table : jump_tables_) {
+ jump_table->EmitTable(this);
+ }
+}
+
void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
+ EmitJumpTables();
// Ensure we emit the literal pool.
__ FinalizeCode();
@@ -1368,13 +1661,25 @@ void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) {
}
void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction) {
+ DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_field_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the load to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
@@ -1403,7 +1708,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
}
if (field_type == Primitive::kPrimNot) {
- GetAssembler()->MaybeUnpoisonHeapReference(OutputCPURegister(instruction).W());
+ LocationSummary* locations = instruction->GetLocations();
+ Location base = locations->InAt(0);
+ Location out = locations->Out();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, base, offset);
}
}
@@ -1580,6 +1889,82 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) {
HandleBinaryOp(instruction);
}
+void LocationsBuilderARM64::VisitArm64DataProcWithShifterOp(
+ HArm64DataProcWithShifterOp* instruction) {
+ DCHECK(instruction->GetType() == Primitive::kPrimInt ||
+ instruction->GetType() == Primitive::kPrimLong);
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ if (instruction->GetInstrKind() == HInstruction::kNeg) {
+ locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant()));
+ } else {
+ locations->SetInAt(0, Location::RequiresRegister());
+ }
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64DataProcWithShifterOp(
+ HArm64DataProcWithShifterOp* instruction) {
+ Primitive::Type type = instruction->GetType();
+ HInstruction::InstructionKind kind = instruction->GetInstrKind();
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
+ Register out = OutputRegister(instruction);
+ Register left;
+ if (kind != HInstruction::kNeg) {
+ left = InputRegisterAt(instruction, 0);
+ }
+ // If this `HArm64DataProcWithShifterOp` was created by merging a type conversion as the
+ // shifter operand operation, the IR generating `right_reg` (input to the type
+ // conversion) can have a different type from the current instruction's type,
+ // so we manually indicate the type.
+ Register right_reg = RegisterFrom(instruction->GetLocations()->InAt(1), type);
+ int64_t shift_amount = (type == Primitive::kPrimInt)
+ ? static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxIntShiftValue)
+ : static_cast<uint32_t>(instruction->GetShiftAmount() & kMaxLongShiftValue);
+
+ Operand right_operand(0);
+
+ HArm64DataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind();
+ if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind)) {
+ right_operand = Operand(right_reg, helpers::ExtendFromOpKind(op_kind));
+ } else {
+ right_operand = Operand(right_reg, helpers::ShiftFromOpKind(op_kind), shift_amount);
+ }
+
+ // Logical binary operations do not support extension operations in the
+ // operand. Note that VIXL would still manage if it was passed by generating
+ // the extension as a separate instruction.
+ // `HNeg` also does not support extension. See comments in `ShifterOperandSupportsExtension()`.
+ DCHECK(!right_operand.IsExtendedRegister() ||
+ (kind != HInstruction::kAnd && kind != HInstruction::kOr && kind != HInstruction::kXor &&
+ kind != HInstruction::kNeg));
+ switch (kind) {
+ case HInstruction::kAdd:
+ __ Add(out, left, right_operand);
+ break;
+ case HInstruction::kAnd:
+ __ And(out, left, right_operand);
+ break;
+ case HInstruction::kNeg:
+ DCHECK(instruction->InputAt(0)->AsConstant()->IsZero());
+ __ Neg(out, right_operand);
+ break;
+ case HInstruction::kOr:
+ __ Orr(out, left, right_operand);
+ break;
+ case HInstruction::kSub:
+ __ Sub(out, left, right_operand);
+ break;
+ case HInstruction::kXor:
+ __ Eor(out, left, right_operand);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected operation kind: " << kind;
+ UNREACHABLE();
+ }
+}
+
void LocationsBuilderARM64::VisitArm64IntermediateAddress(HArm64IntermediateAddress* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
@@ -1595,23 +1980,75 @@ void InstructionCodeGeneratorARM64::VisitArm64IntermediateAddress(
Operand(InputOperandAt(instruction, 1)));
}
+void LocationsBuilderARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+ LocationSummary* locations =
+ new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+ locations->SetInAt(HArm64MultiplyAccumulate::kInputAccumulatorIndex,
+ Location::RequiresRegister());
+ locations->SetInAt(HArm64MultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+ locations->SetInAt(HArm64MultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARM64::VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instr) {
+ Register res = OutputRegister(instr);
+ Register accumulator = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputAccumulatorIndex);
+ Register mul_left = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulLeftIndex);
+ Register mul_right = InputRegisterAt(instr, HArm64MultiplyAccumulate::kInputMulRightIndex);
+
+ // Avoid emitting code that could trigger Cortex A53's erratum 835769.
+ // This fixup should be carried out for all multiply-accumulate instructions:
+ // madd, msub, smaddl, smsubl, umaddl and umsubl.
+ if (instr->GetType() == Primitive::kPrimLong &&
+ codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) {
+ MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler();
+ vixl::Instruction* prev = masm->GetCursorAddress<vixl::Instruction*>() - vixl::kInstructionSize;
+ if (prev->IsLoadOrStore()) {
+ // Make sure we emit only exactly one nop.
+ vixl::CodeBufferCheckScope scope(masm,
+ vixl::kInstructionSize,
+ vixl::CodeBufferCheckScope::kCheck,
+ vixl::CodeBufferCheckScope::kExactSize);
+ __ nop();
+ }
+ }
+
+ if (instr->GetOpKind() == HInstruction::kAdd) {
+ __ Madd(res, mul_left, mul_right, accumulator);
+ } else {
+ DCHECK(instr->GetOpKind() == HInstruction::kSub);
+ __ Msub(res, mul_left, mul_right, accumulator);
+ }
+}
+
void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps in the case of an object array get with
+ // read barriers enabled: we do not want the move to overwrite the
+ // array's location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
Primitive::Type type = instruction->GetType();
Register obj = InputRegisterAt(instruction, 0);
- Location index = instruction->GetLocations()->InAt(1);
- size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
+ LocationSummary* locations = instruction->GetLocations();
+ Location index = locations->InAt(1);
+ uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(type)).Uint32Value();
MemOperand source = HeapOperand(obj);
CPURegister dest = OutputCPURegister(instruction);
@@ -1643,8 +2080,22 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
codegen_->Load(type, dest, source);
codegen_->MaybeRecordImplicitNullCheck(instruction);
- if (instruction->GetType() == Primitive::kPrimNot) {
- GetAssembler()->MaybeUnpoisonHeapReference(dest.W());
+ if (type == Primitive::kPrimNot) {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ Location obj_loc = locations->InAt(0);
+ Location out = locations->Out();
+ if (index.IsConstant()) {
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+ } else {
+ // Note: when `obj_loc` is a HArm64IntermediateAddress, it does
+ // not contain the base address of the array object, which is
+ // needed by the read barrier entry point. So the read barrier
+ // slow path will temporarily set back `obj_loc` to the right
+ // address (see ReadBarrierForHeapReferenceSlowPathARM64::EmitNativeCode).
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset, index);
+ }
}
}
@@ -1662,12 +2113,19 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction)
}
void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
+ Primitive::Type value_type = instruction->GetComponentType();
+
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+ bool object_array_set_with_read_barrier =
+ kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- instruction->NeedsTypeCheck() ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+ if (Primitive::IsFloatingPointType(value_type)) {
locations->SetInAt(2, Location::RequiresFpuRegister());
} else {
locations->SetInAt(2, Location::RequiresRegister());
@@ -1677,7 +2135,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) {
void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
Primitive::Type value_type = instruction->GetComponentType();
LocationSummary* locations = instruction->GetLocations();
- bool may_need_runtime_call = locations->CanCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
@@ -1691,7 +2149,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
BlockPoolsScope block_pools(masm);
if (!needs_write_barrier) {
- DCHECK(!may_need_runtime_call);
+ DCHECK(!may_need_runtime_call_for_type_check);
if (index.IsConstant()) {
offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type);
destination = HeapOperand(array, offset);
@@ -1741,7 +2199,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
- if (may_need_runtime_call) {
+ if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
@@ -1756,26 +2214,66 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
__ Bind(&non_zero);
}
- Register temp2 = temps.AcquireSameSizeAs(array);
- __ Ldr(temp, HeapOperand(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
- __ Ldr(temp, HeapOperand(temp, component_offset));
- __ Ldr(temp2, HeapOperand(Register(value), class_offset));
- // No need to poison/unpoison, we're comparing two poisoned references.
- __ Cmp(temp, temp2);
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- vixl::Label do_put;
- __ B(eq, &do_put);
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
- __ Ldr(temp, HeapOperand(temp, super_offset));
- // No need to unpoison, we're comparing against null.
- __ Cbnz(temp, slow_path->GetEntryLabel());
- __ Bind(&do_put);
+ if (kEmitCompilerReadBarrier) {
+ // When read barriers are enabled, the type checking
+ // instrumentation requires two read barriers:
+ //
+ // __ Mov(temp2, temp);
+ // // /* HeapReference<Class> */ temp = temp->component_type_
+ // __ Ldr(temp, HeapOperand(temp, component_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ //
+ // // /* HeapReference<Class> */ temp2 = value->klass_
+ // __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp2_loc, temp2_loc, value_loc, class_offset, temp_loc);
+ //
+ // __ Cmp(temp, temp2);
+ //
+ // However, the second read barrier may trash `temp`, as it
+ // is a temporary register, and as such would not be saved
+ // along with live registers before calling the runtime (nor
+ // restored afterwards). So in this case, we bail out and
+ // delegate the work to the array set slow path.
+ //
+ // TODO: Extend the register allocator to support a new
+ // "(locally) live temp" location so as to avoid always
+ // going into the slow path when read barriers are enabled.
+ __ B(slow_path->GetEntryLabel());
} else {
- __ B(ne, slow_path->GetEntryLabel());
+ Register temp2 = temps.AcquireSameSizeAs(array);
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ Ldr(temp, HeapOperand(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ Ldr(temp, HeapOperand(temp, component_offset));
+ // /* HeapReference<Class> */ temp2 = value->klass_
+ __ Ldr(temp2, HeapOperand(Register(value), class_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor `temp2`, as we are comparing two poisoned references.
+ __ Cmp(temp, temp2);
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ vixl::Label do_put;
+ __ B(eq, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
+ GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ __ Ldr(temp, HeapOperand(temp, super_offset));
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp`, as we are comparing against null below.
+ __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ B(ne, slow_path->GetEntryLabel());
+ }
+ temps.Release(temp2);
}
- temps.Release(temp2);
}
if (kPoisonHeapReferences) {
@@ -1791,7 +2289,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
}
__ Str(source, destination);
- if (!may_need_runtime_call) {
+ if (!may_need_runtime_call_for_type_check) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
}
@@ -2283,38 +2781,56 @@ void InstructionCodeGeneratorARM64::VisitTryBoundary(HTryBoundary* try_boundary)
}
void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
vixl::Label* true_target,
- vixl::Label* false_target,
- vixl::Label* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- HCondition* condition = cond->AsCondition();
-
- if (cond->IsIntConstant()) {
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ B(always_true_target);
+ vixl::Label* false_target) {
+ // FP branching requires both targets to be explicit. If either of the targets
+ // is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+ vixl::Label fallthrough_target;
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against 1.
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ B(true_target);
}
- return;
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ B(false_target);
+ }
}
- } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
// The condition instruction has been materialized, compare the output to 0.
- Location cond_val = instruction->GetLocations()->InAt(0);
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
DCHECK(cond_val.IsRegister());
- __ Cbnz(InputRegisterAt(instruction, 0), true_target);
+ if (true_target == nullptr) {
+ __ Cbz(InputRegisterAt(instruction, condition_input_index), false_target);
+ } else {
+ __ Cbnz(InputRegisterAt(instruction, condition_input_index), true_target);
+ }
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
- Primitive::Type type =
- cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
+ HCondition* condition = cond->AsCondition();
+ Primitive::Type type = condition->InputAt(0)->GetType();
if (Primitive::IsFloatingPointType(type)) {
- // FP compares don't like null false_targets.
- if (false_target == nullptr) {
- false_target = codegen_->GetLabelOf(instruction->AsIf()->IfFalseSuccessor());
- }
FPRegister lhs = InputFPRegisterAt(condition, 0);
if (condition->GetLocations()->InAt(1).IsConstant()) {
DCHECK(IsFloatingPointZeroConstant(condition->GetLocations()->InAt(1).GetConstant()));
@@ -2324,31 +2840,45 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
__ Fcmp(lhs, InputFPRegisterAt(condition, 1));
}
if (condition->IsFPConditionTrueIfNaN()) {
- __ B(vs, true_target); // VS for unordered.
+ __ B(vs, true_target == nullptr ? &fallthrough_target : true_target);
} else if (condition->IsFPConditionFalseIfNaN()) {
- __ B(vs, false_target); // VS for unordered.
+ __ B(vs, false_target == nullptr ? &fallthrough_target : false_target);
+ }
+ if (true_target == nullptr) {
+ __ B(ARM64Condition(condition->GetOppositeCondition()), false_target);
+ } else {
+ __ B(ARM64Condition(condition->GetCondition()), true_target);
}
- __ B(ARM64Condition(condition->GetCondition()), true_target);
} else {
// Integer cases.
Register lhs = InputRegisterAt(condition, 0);
Operand rhs = InputOperandAt(condition, 1);
- Condition arm64_cond = ARM64Condition(condition->GetCondition());
+
+ Condition arm64_cond;
+ vixl::Label* non_fallthrough_target;
+ if (true_target == nullptr) {
+ arm64_cond = ARM64Condition(condition->GetOppositeCondition());
+ non_fallthrough_target = false_target;
+ } else {
+ arm64_cond = ARM64Condition(condition->GetCondition());
+ non_fallthrough_target = true_target;
+ }
+
if ((arm64_cond != gt && arm64_cond != le) && rhs.IsImmediate() && (rhs.immediate() == 0)) {
switch (arm64_cond) {
case eq:
- __ Cbz(lhs, true_target);
+ __ Cbz(lhs, non_fallthrough_target);
break;
case ne:
- __ Cbnz(lhs, true_target);
+ __ Cbnz(lhs, non_fallthrough_target);
break;
case lt:
// Test the sign bit and branch accordingly.
- __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ __ Tbnz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
break;
case ge:
// Test the sign bit and branch accordingly.
- __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, true_target);
+ __ Tbz(lhs, (lhs.IsX() ? kXRegSize : kWRegSize) - 1, non_fallthrough_target);
break;
default:
// Without the `static_cast` the compiler throws an error for
@@ -2357,43 +2887,43 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct
}
} else {
__ Cmp(lhs, rhs);
- __ B(arm64_cond, true_target);
+ __ B(arm64_cond, non_fallthrough_target);
}
}
}
- if (false_target != nullptr) {
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
__ B(false_target);
}
+
+ if (fallthrough_target.IsLinked()) {
+ __ Bind(&fallthrough_target);
+ }
}
void LocationsBuilderARM64::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
- vixl::Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- vixl::Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- vixl::Label* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ vixl::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ vixl::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
@@ -2402,8 +2932,10 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
DeoptimizationSlowPathARM64(deoptimize);
codegen_->AddSlowPath(slow_path);
- vixl::Label* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderARM64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -2424,40 +2956,44 @@ void InstructionCodeGeneratorARM64::VisitInstanceFieldSet(HInstanceFieldSet* ins
void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- // The out register is used as a temporary, so it overlaps with the inputs.
- // Note that TypeCheckSlowPathARM64 uses this register too.
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
- locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt));
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // The "out" register is used as a temporary, so it overlaps with the inputs.
+ // Note that TypeCheckSlowPathARM64 uses this register too.
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ // When read barriers are enabled, we need a temporary register for
+ // some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
Register cls = InputRegisterAt(instruction, 1);
+ Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
@@ -2473,15 +3009,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
__ Cbz(obj, &zero);
}
- // In case of an interface/unresolved check, we put the object class into the object register.
- // This is safe, as the register is caller-save, and the object must be in another
- // register if it survives the runtime call.
- Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
- (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
- ? obj
- : out;
- __ Ldr(target, HeapOperand(obj.W(), class_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(target);
+ // /* HeapReference<Class> */ out = obj->klass_
+ __ Ldr(out, HeapOperand(obj.W(), class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
switch (instruction->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck: {
@@ -2492,13 +3022,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
vixl::Label loop, success;
__ Bind(&loop);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = WRegisterFrom(temp_loc);
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ Ldr(out, HeapOperand(out, super_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
// If `out` is null, we use it for the result, and jump to `done`.
__ Cbz(out, &done);
__ Cmp(out, cls);
@@ -2509,14 +3049,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
vixl::Label loop, success;
__ Bind(&loop);
__ Cmp(out, cls);
__ B(eq, &success);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = WRegisterFrom(temp_loc);
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ Ldr(out, HeapOperand(out, super_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ Cbnz(out, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
__ B(&done);
@@ -2527,14 +3077,24 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
vixl::Label exact_check;
__ Cmp(out, cls);
__ B(eq, &exact_check);
- // Otherwise, we need to check that the object's class is a non primitive array.
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = WRegisterFrom(temp_loc);
+ __ Mov(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->component_type_
__ Ldr(out, HeapOperand(out, component_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
// If `out` is null, we use it for the result, and jump to `done`.
__ Cbz(out, &done);
__ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -2545,11 +3105,12 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
__ B(&done);
break;
}
+
case TypeCheckKind::kArrayCheck: {
__ Cmp(out, cls);
DCHECK(locations->OnlyCallsOnSlowPath());
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
- instruction, /* is_fatal */ false);
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+ /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
@@ -2558,13 +3119,25 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- default: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved and interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ B(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
__ B(&done);
}
@@ -2590,58 +3163,62 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- // Note that TypeCheckSlowPathARM64 uses this register too.
- locations->AddTemp(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ // When read barriers are enabled, we need an additional temporary
+ // register for some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
+ Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
Register cls = InputRegisterAt(instruction, 1);
- Register temp;
- if (!locations->WillCall()) {
- temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
- }
-
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = WRegisterFrom(temp_loc);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- SlowPathCodeARM64* slow_path = nullptr;
- if (!locations->WillCall()) {
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(
- instruction, !locations->CanCall());
- codegen_->AddSlowPath(slow_path);
- }
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ SlowPathCodeARM64* type_check_slow_path =
+ new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
+ is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(type_check_slow_path);
vixl::Label done;
// Avoid null check if we know obj is not null.
@@ -2649,76 +3226,159 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ Cbz(obj, &done);
}
- if (locations->WillCall()) {
- __ Ldr(obj, HeapOperand(obj, class_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(obj);
- } else {
- __ Ldr(temp, HeapOperand(obj, class_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
- }
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ Ldr(temp, HeapOperand(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
__ Cmp(temp, cls);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ B(ne, slow_path->GetEntryLabel());
+ __ B(ne, type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- vixl::Label loop;
+ vixl::Label loop, compare_classes;
__ Bind(&loop);
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = WRegisterFrom(temp2_loc);
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ Ldr(temp, HeapOperand(temp, super_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
- // Jump to the slow path to throw the exception.
- __ Cbz(temp, slow_path->GetEntryLabel());
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // to the `compare_classes` label to compare it with the checked
+ // class.
+ __ Cbnz(temp, &compare_classes);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ Ldr(temp, HeapOperand(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ B(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&compare_classes);
__ Cmp(temp, cls);
__ B(ne, &loop);
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
vixl::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
__ B(eq, &done);
+
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = WRegisterFrom(temp2_loc);
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ Ldr(temp, HeapOperand(temp, super_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // back at the beginning of the loop.
__ Cbnz(temp, &loop);
- // Jump to the slow path to throw the exception.
- __ B(slow_path->GetEntryLabel());
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ Ldr(temp, HeapOperand(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ B(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
+ vixl::Label check_non_primitive_component_type;
__ Cmp(temp, cls);
__ B(eq, &done);
- // Otherwise, we need to check that the object's class is a non primitive array.
+
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = WRegisterFrom(temp2_loc);
+ __ Mov(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->component_type_
__ Ldr(temp, HeapOperand(temp, component_offset));
- GetAssembler()->MaybeUnpoisonHeapReference(temp);
- __ Cbz(temp, slow_path->GetEntryLabel());
+ codegen_->MaybeGenerateReadBarrier(
+ instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+ // If the component type is not null (i.e. the object is indeed
+ // an array), jump to label `check_non_primitive_component_type`
+ // to further check that this component type is not a primitive
+ // type.
+ __ Cbnz(temp, &check_non_primitive_component_type);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ Ldr(temp, HeapOperand(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ B(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&check_non_primitive_component_type);
__ Ldrh(temp, HeapOperand(temp, primitive_offset));
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
- __ Cbnz(temp, slow_path->GetEntryLabel());
+ __ Cbz(temp, &done);
+ // Same comment as above regarding `temp` and the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ Ldr(temp, HeapOperand(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ B(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- default:
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ // We always go into the type check slow path for the unresolved
+ // and interface check cases.
+ //
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ B(type_check_slow_path->GetEntryLabel());
break;
}
__ Bind(&done);
- if (slow_path != nullptr) {
- __ Bind(slow_path->GetExitLabel());
- }
+ __ Bind(type_check_slow_path->GetExitLabel());
}
void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) {
@@ -2761,10 +3421,11 @@ void LocationsBuilderARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
- Register temp = XRegisterFrom(invoke->GetLocations()->GetTemp(0));
+ LocationSummary* locations = invoke->GetLocations();
+ Register temp = XRegisterFrom(locations->GetTemp(0));
uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
invoke->GetImtIndex() % mirror::Class::kImtSize, kArm64PointerSize).Uint32Value();
- Location receiver = invoke->GetLocations()->InAt(0);
+ Location receiver = locations->InAt(0);
Offset class_offset = mirror::Object::ClassOffset();
Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize);
@@ -2776,14 +3437,22 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
scratch_scope.Exclude(ip1);
__ Mov(ip1, invoke->GetDexMethodIndex());
- // temp = object->GetClass();
if (receiver.IsStackSlot()) {
__ Ldr(temp.W(), StackOperandFrom(receiver));
+ // /* HeapReference<Class> */ temp = temp->klass_
__ Ldr(temp.W(), HeapOperand(temp.W(), class_offset));
} else {
+ // /* HeapReference<Class> */ temp = receiver->klass_
__ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
// temp = temp->GetImtEntryAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
@@ -2856,41 +3525,44 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
switch (invoke->GetMethodLoadKind()) {
case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
// temp = thread->string_init_entrypoint
- __ Ldr(XRegisterFrom(temp).X(), MemOperand(tr, invoke->GetStringInitOffset()));
+ __ Ldr(XRegisterFrom(temp), MemOperand(tr, invoke->GetStringInitOffset()));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
// Load method address from literal pool.
- __ Ldr(XRegisterFrom(temp).X(), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
// Load method address from literal pool with a link-time patch.
- __ Ldr(XRegisterFrom(temp).X(),
+ __ Ldr(XRegisterFrom(temp),
DeduplicateMethodAddressLiteral(invoke->GetTargetMethod()));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
// Add ADRP with its PC-relative DexCache access patch.
- pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
- invoke->GetDexCacheArrayOffset());
- vixl::Label* pc_insn_label = &pc_rel_dex_cache_patches_.back().label;
+ pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetDexCacheArrayOffset());
+ vixl::Label* pc_insn_label = &pc_relative_dex_cache_patches_.back().label;
{
vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
- __ adrp(XRegisterFrom(temp).X(), 0);
+ __ Bind(pc_insn_label);
+ __ adrp(XRegisterFrom(temp), 0);
}
- __ Bind(pc_insn_label); // Bind after ADRP.
- pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+ pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
// Add LDR with its PC-relative DexCache access patch.
- pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
- invoke->GetDexCacheArrayOffset());
- __ Ldr(XRegisterFrom(temp).X(), MemOperand(XRegisterFrom(temp).X(), 0));
- __ Bind(&pc_rel_dex_cache_patches_.back().label); // Bind after LDR.
- pc_rel_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+ pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetDexCacheArrayOffset());
+ {
+ vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ __ Bind(&pc_relative_dex_cache_patches_.back().label);
+ __ ldr(XRegisterFrom(temp), MemOperand(XRegisterFrom(temp), 0));
+ pc_relative_dex_cache_patches_.back().pc_insn_label = pc_insn_label;
+ }
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register reg = XRegisterFrom(temp);
Register method_reg;
if (current_method.IsRegister()) {
@@ -2902,7 +3574,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
__ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
}
- // temp = current_method->dex_cache_resolved_methods_;
+ // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
__ Ldr(reg.X(),
MemOperand(method_reg.X(),
ArtMethod::DexCacheResolvedMethodsOffset(kArm64WordSize).Int32Value()));
@@ -2920,8 +3592,9 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative: {
relative_call_patches_.emplace_back(invoke->GetTargetMethod());
vixl::Label* label = &relative_call_patches_.back().label;
- __ Bl(label); // Arbitrarily branch to the instruction after BL, override at link time.
- __ Bind(label); // Bind after BL.
+ vixl::SingleEmissionCheckScope guard(GetVIXLAssembler());
+ __ Bind(label);
+ __ bl(0); // Branch and link to itself. This will be overriden at link time.
break;
}
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
@@ -2934,7 +3607,7 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
// LR = callee_method->entry_point_from_quick_compiled_code_;
__ Ldr(lr, MemOperand(
- XRegisterFrom(callee_method).X(),
+ XRegisterFrom(callee_method),
ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64WordSize).Int32Value()));
// lr()
__ Blr(lr);
@@ -2945,8 +3618,12 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invok
}
void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ Register receiver = calling_convention.GetRegisterAt(0);
Register temp = XRegisterFrom(temp_in);
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kArm64PointerSize).SizeValue();
@@ -2956,8 +3633,15 @@ void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location te
BlockPoolsScope block_pools(GetVIXLAssembler());
DCHECK(receiver.IsRegister());
- __ Ldr(temp.W(), HeapOperandFrom(receiver, class_offset));
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ Ldr(temp.W(), HeapOperandFrom(LocationFrom(receiver), class_offset));
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
// temp = temp->GetMethodAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
@@ -2973,7 +3657,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
method_patches_.size() +
call_patches_.size() +
relative_call_patches_.size() +
- pc_rel_dex_cache_patches_.size();
+ pc_relative_dex_cache_patches_.size();
linker_patches->reserve(size);
for (const auto& entry : method_patches_) {
const MethodReference& target_method = entry.first;
@@ -2990,14 +3674,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patc
target_method.dex_method_index));
}
for (const MethodPatchInfo<vixl::Label>& info : relative_call_patches_) {
- linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location() - 4u,
+ linker_patches->push_back(LinkerPatch::RelativeCodePatch(info.label.location(),
info.target_method.dex_file,
info.target_method.dex_method_index));
}
- for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
- linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location() - 4u,
+ for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+ linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.location(),
&info.target_dex_file,
- info.pc_insn_label->location() - 4u,
+ info.pc_insn_label->location(),
info.element_offset));
}
}
@@ -3070,7 +3754,8 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
LocationFrom(calling_convention.GetRegisterAt(0)),
- LocationFrom(vixl::x0));
+ LocationFrom(vixl::x0),
+ /* code_generator_supports_read_barrier */ true);
}
void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
@@ -3080,30 +3765,56 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) {
cls,
cls->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
+ Location out_loc = cls->GetLocations()->Out();
Register out = OutputRegister(cls);
Register current_method = InputRegisterAt(cls, 0);
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ Add(out.X(), current_method.X(), declaring_class_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+ }
} else {
- DCHECK(cls->CanCallRuntime());
MemberOffset resolved_types_offset = ArtMethod::DexCacheResolvedTypesOffset(kArm64PointerSize);
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
__ Ldr(out.X(), MemOperand(current_method, resolved_types_offset.Int32Value()));
- __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
- // TODO: We will need a read barrier here.
- SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ Cbz(out, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
+ size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+ __ Add(out.X(), out.X(), cache_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
} else {
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ __ Ldr(out, MemOperand(out.X(), cache_offset));
+ }
+
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (!cls->IsInDexCache()) {
+ __ Cbz(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -3149,12 +3860,35 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) {
SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
+ Location out_loc = load->GetLocations()->Out();
Register out = OutputRegister(load);
Register current_method = InputRegisterAt(load, 0);
- __ Ldr(out, MemOperand(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
- __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset()));
- __ Ldr(out, MemOperand(out.X(), CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- // TODO: We will need a read barrier here.
+
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ Add(out.X(), current_method.X(), declaring_class_offset);
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ Ldr(out, MemOperand(current_method, declaring_class_offset));
+ }
+
+ // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+ __ Ldr(out.X(), HeapOperand(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::String>* */ out = &out[string_index]
+ __ Add(out.X(), out.X(), cache_offset);
+ // /* mirror::String* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ __ Ldr(out, MemOperand(out.X(), cache_offset));
+ }
+
__ Cbz(out, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
}
@@ -3189,7 +3923,11 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins
instruction,
instruction->GetDexPc(),
nullptr);
- CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
}
void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -3278,8 +4016,6 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
locations->SetOut(LocationFrom(x0));
locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
- CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck,
- void*, uint32_t, int32_t, ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
@@ -3301,17 +4037,12 @@ void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
- CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
}
void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- Register type_index = RegisterFrom(locations->GetTemp(0), Primitive::kPrimInt);
- DCHECK(type_index.Is(w0));
- __ Mov(type_index, instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
@@ -3488,6 +4219,11 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
: QUICK_ENTRY_POINT(pFmod);
codegen_->InvokeRuntime(entry_offset, rem, rem->GetDexPc(), nullptr);
+ if (type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+ } else {
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
+ }
break;
}
@@ -3732,9 +4468,7 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers
int min_size = std::min(result_size, input_size);
Register output = OutputRegister(conversion);
Register source = InputRegisterAt(conversion, 0);
- if ((result_type == Primitive::kPrimChar) && (input_size < result_size)) {
- __ Ubfx(output, source, 0, result_size * kBitsPerByte);
- } else if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+ if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
// 'int' values are used directly as W registers, discarding the top
// bits, so we don't need to sign-extend and can just perform a move.
// We do not pass the `kDiscardForSameWReg` argument to force clearing the
@@ -3743,9 +4477,11 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers
// 32bit input value as a 64bit value assuming that the top 32 bits are
// zero.
__ Mov(output.W(), source.W());
- } else if ((result_type == Primitive::kPrimChar) ||
- ((input_type == Primitive::kPrimChar) && (result_size > input_size))) {
- __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
+ } else if (result_type == Primitive::kPrimChar ||
+ (input_type == Primitive::kPrimChar && input_size < result_size)) {
+ __ Ubfx(output,
+ output.IsX() ? source.X() : source.W(),
+ 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte);
} else {
__ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte);
}
@@ -3810,29 +4546,152 @@ void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
void InstructionCodeGeneratorARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
int32_t lower_bound = switch_instr->GetStartValue();
- int32_t num_entries = switch_instr->GetNumEntries();
+ uint32_t num_entries = switch_instr->GetNumEntries();
Register value_reg = InputRegisterAt(switch_instr, 0);
HBasicBlock* default_block = switch_instr->GetDefaultBlock();
- // Create a series of compare/jumps.
- const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
- for (int32_t i = 0; i < num_entries; i++) {
- int32_t case_value = lower_bound + i;
- vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
- if (case_value == 0) {
- __ Cbz(value_reg, succ);
+ // Roughly set 16 as max average assemblies generated per HIR in a graph.
+ static constexpr int32_t kMaxExpectedSizePerHInstruction = 16 * vixl::kInstructionSize;
+ // ADR has a limited range(+/-1MB), so we set a threshold for the number of HIRs in the graph to
+ // make sure we don't emit it if the target may run out of range.
+ // TODO: Instead of emitting all jump tables at the end of the code, we could keep track of ADR
+ // ranges and emit the tables only as required.
+ static constexpr int32_t kJumpTableInstructionThreshold = 1* MB / kMaxExpectedSizePerHInstruction;
+
+ if (num_entries < kPackedSwitchJumpTableThreshold ||
+ // Current instruction id is an upper bound of the number of HIRs in the graph.
+ GetGraph()->GetCurrentInstructionId() > kJumpTableInstructionThreshold) {
+ // Create a series of compare/jumps.
+ const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+ for (uint32_t i = 0; i < num_entries; i++) {
+ int32_t case_value = lower_bound + i;
+ vixl::Label* succ = codegen_->GetLabelOf(successors[i]);
+ if (case_value == 0) {
+ __ Cbz(value_reg, succ);
+ } else {
+ __ Cmp(value_reg, Operand(case_value));
+ __ B(eq, succ);
+ }
+ }
+
+ // And the default for any other value.
+ if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+ __ B(codegen_->GetLabelOf(default_block));
+ }
+ } else {
+ JumpTableARM64* jump_table = new (GetGraph()->GetArena()) JumpTableARM64(switch_instr);
+ codegen_->AddJumpTable(jump_table);
+
+ UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+
+ // Below instructions should use at most one blocked register. Since there are two blocked
+ // registers, we are free to block one.
+ Register temp_w = temps.AcquireW();
+ Register index;
+ // Remove the bias.
+ if (lower_bound != 0) {
+ index = temp_w;
+ __ Sub(index, value_reg, Operand(lower_bound));
} else {
- __ Cmp(value_reg, vixl::Operand(case_value));
- __ B(eq, succ);
+ index = value_reg;
}
- }
- // And the default for any other value.
- if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
- __ B(codegen_->GetLabelOf(default_block));
+ // Jump to default block if index is out of the range.
+ __ Cmp(index, Operand(num_entries));
+ __ B(hs, codegen_->GetLabelOf(default_block));
+
+ // In current VIXL implementation, it won't require any blocked registers to encode the
+ // immediate value for Adr. So we are free to use both VIXL blocked registers to reduce the
+ // register pressure.
+ Register table_base = temps.AcquireX();
+ // Load jump offset from the table.
+ __ Adr(table_base, jump_table->GetTableStartLabel());
+ Register jump_offset = temp_w;
+ __ Ldr(jump_offset, MemOperand(table_base, index, UXTW, 2));
+
+ // Jump to target block by branching to table_base(pc related) + offset.
+ Register target_address = table_base;
+ __ Add(target_address, table_base, Operand(jump_offset, SXTW));
+ __ Br(target_address);
+ }
+}
+
+void CodeGeneratorARM64::GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ // TODO: When read barrier has a fast path, add it here.
+ /* Currently the read barrier call is inserted after the original load.
+ * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+ * original load. This load-load ordering is required by the read barrier.
+ * The fast path/slow path (for Baker's algorithm) should look like:
+ *
+ * bool isGray = obj.LockWord & kReadBarrierMask;
+ * lfence; // load fence or artificial data dependence to prevent load-load reordering
+ * ref = obj.field; // this is the original load
+ * if (isGray) {
+ * ref = Mark(ref); // ideally the slow path just does Mark(ref)
+ * }
+ */
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorARM64::MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ GetAssembler()->UnpoisonHeapReference(WRegisterFrom(out));
}
}
+void CodeGeneratorARM64::GenerateReadBarrierForRoot(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCodeARM64* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ // TODO: Implement a fast path for ReadBarrierForRoot, performing
+ // the following operation (for Baker's algorithm):
+ //
+ // if (thread.tls32_.is_gc_marking) {
+ // root = Mark(root);
+ // }
+
+ __ B(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
#undef __
#undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index ab684ea538..7950f078ad 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -81,6 +81,22 @@ class SlowPathCodeARM64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(SlowPathCodeARM64);
};
+class JumpTableARM64 : public ArenaObject<kArenaAllocSwitchTable> {
+ public:
+ explicit JumpTableARM64(HPackedSwitch* switch_instr)
+ : switch_instr_(switch_instr), table_start_() {}
+
+ vixl::Label* GetTableStartLabel() { return &table_start_; }
+
+ void EmitTable(CodeGeneratorARM64* codegen);
+
+ private:
+ HPackedSwitch* const switch_instr_;
+ vixl::Label table_start_;
+
+ DISALLOW_COPY_AND_ASSIGN(JumpTableARM64);
+};
+
static const vixl::Register kRuntimeParameterCoreRegisters[] =
{ vixl::x0, vixl::x1, vixl::x2, vixl::x3, vixl::x4, vixl::x5, vixl::x6, vixl::x7 };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
@@ -203,9 +219,9 @@ class InstructionCodeGeneratorARM64 : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
vixl::Label* true_target,
- vixl::Label* false_target,
- vixl::Label* always_true_target);
+ vixl::Label* false_target);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -358,6 +374,10 @@ class CodeGeneratorARM64 : public CodeGenerator {
block_labels_ = CommonInitializeLabels<vixl::Label>();
}
+ void AddJumpTable(JumpTableARM64* jump_table) {
+ jump_tables_.push_back(jump_table);
+ }
+
void Finalize(CodeAllocator* allocator) OVERRIDE;
// Code generation helpers.
@@ -404,6 +424,51 @@ class CodeGeneratorARM64 : public CodeGenerator {
void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
+ // Generate a read barrier for a heap reference within `instruction`.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap reference.
+ // If heap poisoning is enabled, also unpoison the reference in `out`.
+ void MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction`.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
private:
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::Literal<uint64_t>*>;
using MethodToLiteralMap = ArenaSafeMap<MethodReference,
@@ -422,15 +487,16 @@ class CodeGeneratorARM64 : public CodeGenerator {
const DexFile& target_dex_file;
uint32_t element_offset;
- // NOTE: Labels are bound to the end of the patched instruction because
- // we don't know if there will be a veneer or how big it will be.
vixl::Label label;
vixl::Label* pc_insn_label;
};
+ void EmitJumpTables();
+
// Labels for each block that will be compiled.
vixl::Label* block_labels_; // Indexed by block id.
vixl::Label frame_entry_label_;
+ ArenaVector<JumpTableARM64*> jump_tables_;
LocationsBuilderARM64 location_builder_;
InstructionCodeGeneratorARM64 instruction_visitor_;
@@ -447,7 +513,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Using ArenaDeque<> which retains element addresses on push/emplace_back().
ArenaDeque<MethodPatchInfo<vixl::Label>> relative_call_patches_;
// PC-relative DexCache access info.
- ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+ ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
};
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 29d08beb97..9dc9167824 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -40,12 +40,8 @@ static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kMethodRegisterArgument = A0;
// We need extra temporary/scratch registers (in addition to AT) in some cases.
-static constexpr Register TMP = T8;
static constexpr FRegister FTMP = F8;
-// ART Thread Register.
-static constexpr Register TR = S1;
-
Location MipsReturnLocation(Primitive::Type return_type) {
switch (return_type) {
case Primitive::kPrimBoolean:
@@ -419,13 +415,11 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
dex_pc,
this,
IsDirectEntrypoint(kQuickInstanceofNonTrivial));
+ CheckEntrypointTypes<
+ kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
- CheckEntrypointTypes<kQuickInstanceofNonTrivial,
- uint32_t,
- const mirror::Class*,
- const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
mips_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
@@ -465,6 +459,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
dex_pc,
this,
IsDirectEntrypoint(kQuickDeoptimize));
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
@@ -1732,12 +1727,11 @@ void InstructionCodeGeneratorMIPS::VisitArrayLength(HArrayLength* instruction) {
}
void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
- Primitive::Type value_type = instruction->GetComponentType();
- bool is_object = value_type == Primitive::kPrimNot;
+ bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- is_object ? LocationSummary::kCall : LocationSummary::kNoCall);
- if (is_object) {
+ needs_runtime_call ? LocationSummary::kCall : LocationSummary::kNoCall);
+ if (needs_runtime_call) {
InvokeRuntimeCallingConvention calling_convention;
locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
@@ -2425,30 +2419,51 @@ void InstructionCodeGeneratorMIPS::VisitTryBoundary(HTryBoundary* try_boundary)
}
void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
MipsLabel* true_target,
- MipsLabel* false_target,
- MipsLabel* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- HCondition* condition = cond->AsCondition();
-
- if (cond->IsIntConstant()) {
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ B(always_true_target);
+ MipsLabel* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against 1.
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ B(true_target);
}
- return;
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ B(false_target);
+ }
}
- } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
// The condition instruction has been materialized, compare the output to 0.
- Location cond_val = instruction->GetLocations()->InAt(0);
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
DCHECK(cond_val.IsRegister());
- __ Bnez(cond_val.AsRegister<Register>(), true_target);
+ if (true_target == nullptr) {
+ __ Beqz(cond_val.AsRegister<Register>(), false_target);
+ } else {
+ __ Bnez(cond_val.AsRegister<Register>(), true_target);
+ }
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
Register lhs = condition->GetLocations()->InAt(0).AsRegister<Register>();
Location rhs_location = condition->GetLocations()->InAt(1);
Register rhs_reg = ZERO;
@@ -2460,37 +2475,46 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi
rhs_reg = rhs_location.AsRegister<Register>();
}
- IfCondition if_cond = condition->GetCondition();
+ IfCondition if_cond;
+ MipsLabel* non_fallthrough_target;
+ if (true_target == nullptr) {
+ if_cond = condition->GetOppositeCondition();
+ non_fallthrough_target = false_target;
+ } else {
+ if_cond = condition->GetCondition();
+ non_fallthrough_target = true_target;
+ }
+
if (use_imm && rhs_imm == 0) {
switch (if_cond) {
case kCondEQ:
- __ Beqz(lhs, true_target);
+ __ Beqz(lhs, non_fallthrough_target);
break;
case kCondNE:
- __ Bnez(lhs, true_target);
+ __ Bnez(lhs, non_fallthrough_target);
break;
case kCondLT:
- __ Bltz(lhs, true_target);
+ __ Bltz(lhs, non_fallthrough_target);
break;
case kCondGE:
- __ Bgez(lhs, true_target);
+ __ Bgez(lhs, non_fallthrough_target);
break;
case kCondLE:
- __ Blez(lhs, true_target);
+ __ Blez(lhs, non_fallthrough_target);
break;
case kCondGT:
- __ Bgtz(lhs, true_target);
+ __ Bgtz(lhs, non_fallthrough_target);
break;
case kCondB:
break; // always false
case kCondBE:
- __ Beqz(lhs, true_target); // <= 0 if zero
+ __ Beqz(lhs, non_fallthrough_target); // <= 0 if zero
break;
case kCondA:
- __ Bnez(lhs, true_target); // > 0 if non-zero
+ __ Bnez(lhs, non_fallthrough_target); // > 0 if non-zero
break;
case kCondAE:
- __ B(true_target); // always true
+ __ B(non_fallthrough_target); // always true
break;
}
} else {
@@ -2501,81 +2525,78 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi
}
switch (if_cond) {
case kCondEQ:
- __ Beq(lhs, rhs_reg, true_target);
+ __ Beq(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondNE:
- __ Bne(lhs, rhs_reg, true_target);
+ __ Bne(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondLT:
- __ Blt(lhs, rhs_reg, true_target);
+ __ Blt(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondGE:
- __ Bge(lhs, rhs_reg, true_target);
+ __ Bge(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondLE:
- __ Bge(rhs_reg, lhs, true_target);
+ __ Bge(rhs_reg, lhs, non_fallthrough_target);
break;
case kCondGT:
- __ Blt(rhs_reg, lhs, true_target);
+ __ Blt(rhs_reg, lhs, non_fallthrough_target);
break;
case kCondB:
- __ Bltu(lhs, rhs_reg, true_target);
+ __ Bltu(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondAE:
- __ Bgeu(lhs, rhs_reg, true_target);
+ __ Bgeu(lhs, rhs_reg, non_fallthrough_target);
break;
case kCondBE:
- __ Bgeu(rhs_reg, lhs, true_target);
+ __ Bgeu(rhs_reg, lhs, non_fallthrough_target);
break;
case kCondA:
- __ Bltu(rhs_reg, lhs, true_target);
+ __ Bltu(rhs_reg, lhs, non_fallthrough_target);
break;
}
}
}
- if (false_target != nullptr) {
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
__ B(false_target);
}
}
void LocationsBuilderMIPS::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
- MipsLabel* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- MipsLabel* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- MipsLabel* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ MipsLabel* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
- DeoptimizationSlowPathMIPS(deoptimize);
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DeoptimizationSlowPathMIPS(deoptimize);
codegen_->AddSlowPath(slow_path);
- MipsLabel* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
@@ -2616,6 +2637,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
Register obj = locations->InAt(0).AsRegister<Register>();
LoadOperandType load_type = kLoadUnsignedByte;
bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (type) {
case Primitive::kPrimBoolean:
@@ -2646,8 +2668,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
if (is_volatile && load_type == kLoadDoubleword) {
InvokeRuntimeCallingConvention calling_convention;
- __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
- obj, field_info.GetFieldOffset().Uint32Value());
+ __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
// Do implicit Null check
__ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2670,21 +2691,34 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
if (type == Primitive::kPrimLong) {
DCHECK(locations->Out().IsRegisterPair());
dst = locations->Out().AsRegisterPairLow<Register>();
+ Register dst_high = locations->Out().AsRegisterPairHigh<Register>();
+ if (obj == dst) {
+ __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ LoadFromOffset(kLoadWord, dst, obj, offset);
+ } else {
+ __ LoadFromOffset(kLoadWord, dst, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ LoadFromOffset(kLoadWord, dst_high, obj, offset + kMipsWordSize);
+ }
} else {
DCHECK(locations->Out().IsRegister());
dst = locations->Out().AsRegister<Register>();
+ __ LoadFromOffset(load_type, dst, obj, offset);
}
- __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
} else {
DCHECK(locations->Out().IsFpuRegister());
FRegister dst = locations->Out().AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ LoadSFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+ __ LoadSFromOffset(dst, obj, offset);
} else {
- __ LoadDFromOffset(dst, obj, field_info.GetFieldOffset().Uint32Value());
+ __ LoadDFromOffset(dst, obj, offset);
}
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Longs are handled earlier.
+ if (type != Primitive::kPrimLong) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
if (is_volatile) {
@@ -2730,6 +2764,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
Register obj = locations->InAt(0).AsRegister<Register>();
StoreOperandType store_type = kStoreByte;
bool is_volatile = field_info.IsVolatile();
+ uint32_t offset = field_info.GetFieldOffset().Uint32Value();
switch (type) {
case Primitive::kPrimBoolean:
@@ -2760,8 +2795,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
if (is_volatile && store_type == kStoreDoubleword) {
InvokeRuntimeCallingConvention calling_convention;
- __ Addiu32(locations->GetTemp(0).AsRegister<Register>(),
- obj, field_info.GetFieldOffset().Uint32Value());
+ __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset);
// Do implicit Null check.
__ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0);
codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
@@ -2784,21 +2818,28 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction,
if (type == Primitive::kPrimLong) {
DCHECK(locations->InAt(1).IsRegisterPair());
src = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register src_high = locations->InAt(1).AsRegisterPairHigh<Register>();
+ __ StoreToOffset(kStoreWord, src, obj, offset);
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ StoreToOffset(kStoreWord, src_high, obj, offset + kMipsWordSize);
} else {
DCHECK(locations->InAt(1).IsRegister());
src = locations->InAt(1).AsRegister<Register>();
+ __ StoreToOffset(store_type, src, obj, offset);
}
- __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
} else {
DCHECK(locations->InAt(1).IsFpuRegister());
FRegister src = locations->InAt(1).AsFpuRegister<FRegister>();
if (type == Primitive::kPrimFloat) {
- __ StoreSToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+ __ StoreSToOffset(src, obj, offset);
} else {
- __ StoreDToOffset(src, obj, field_info.GetFieldOffset().Uint32Value());
+ __ StoreDToOffset(src, obj, offset);
}
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ // Longs are handled earlier.
+ if (type != Primitive::kPrimLong) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
}
// TODO: memory barriers?
@@ -3009,7 +3050,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
invoke->GetStringInitOffset());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
@@ -3021,7 +3062,7 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke
LOG(FATAL) << "Unsupported";
UNREACHABLE();
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register reg = temp.AsRegister<Register>();
Register method_reg;
if (current_method.IsRegister()) {
@@ -3148,6 +3189,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
cls->GetDexPc(),
nullptr,
IsDirectEntrypoint(kQuickInitializeTypeAndVerifyAccess));
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -3159,21 +3201,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) {
__ LoadFromOffset(kLoadWord, out, current_method,
ArtMethod::DeclaringClassOffset().Int32Value());
} else {
- DCHECK(cls->CanCallRuntime());
__ LoadFromOffset(kLoadWord, out, current_method,
ArtMethod::DexCacheResolvedTypesOffset(kMipsPointerSize).Int32Value());
__ LoadFromOffset(kLoadWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
- SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
- cls,
- cls,
- cls->GetDexPc(),
- cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ Beqz(out, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
- } else {
- __ Bind(slow_path->GetExitLabel());
+
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS(
+ cls,
+ cls,
+ cls->GetDexPc(),
+ cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (!cls->IsInDexCache()) {
+ __ Beqz(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -3456,17 +3503,12 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
}
void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
- InvokeRuntimeCallingConvention calling_convention;
- Register current_method_register = calling_convention.GetRegisterAt(1);
- __ Lw(current_method_register, SP, kCurrentMethodStackOffset);
- // Move an uint16_t value to a register.
- __ LoadConst32(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
codegen_->InvokeRuntime(
GetThreadOffset<kMipsWordSize>(instruction->GetEntrypoint()).Int32Value(),
instruction,
@@ -3683,7 +3725,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
instruction, instruction->GetDexPc(),
nullptr,
IsDirectEntrypoint(kQuickFmodf));
- CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
break;
}
case Primitive::kPrimDouble: {
@@ -3691,7 +3733,7 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
instruction, instruction->GetDexPc(),
nullptr,
IsDirectEntrypoint(kQuickFmod));
- CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
break;
}
default:
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 059131dcfc..e3a2cb40ef 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -226,9 +226,9 @@ class InstructionCodeGeneratorMIPS : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
MipsLabel* true_target,
- MipsLabel* false_target,
- MipsLabel* always_true_target);
+ MipsLabel* false_target);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
MipsAssembler* const assembler_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 55efd5f9de..bc5eb31405 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -16,19 +16,19 @@
#include "code_generator_mips64.h"
+#include "art_method.h"
+#include "code_generator_utils.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
#include "intrinsics.h"
#include "intrinsics_mips64.h"
-#include "art_method.h"
-#include "code_generator_utils.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "offsets.h"
#include "thread.h"
-#include "utils/mips64/assembler_mips64.h"
#include "utils/assembler.h"
+#include "utils/mips64/assembler_mips64.h"
#include "utils/stack_checks.h"
namespace art {
@@ -210,7 +210,7 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
}
RestoreLiveRegisters(codegen, locations);
- __ B(GetExitLabel());
+ __ Bc(GetExitLabel());
}
const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
@@ -257,7 +257,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
type);
RestoreLiveRegisters(codegen, locations);
- __ B(GetExitLabel());
+ __ Bc(GetExitLabel());
}
const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
@@ -312,13 +312,13 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickTestSuspend, void, void>();
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
- __ B(GetReturnLabel());
+ __ Bc(GetReturnLabel());
} else {
- __ B(mips64_codegen->GetLabelOf(successor_));
+ __ Bc(mips64_codegen->GetLabelOf(successor_));
}
}
- Label* GetReturnLabel() {
+ Mips64Label* GetReturnLabel() {
DCHECK(successor_ == nullptr);
return &return_label_;
}
@@ -331,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
HBasicBlock* const successor_;
// If `successor_` is null, the label to branch to after the suspend check.
- Label return_label_;
+ Mips64Label return_label_;
DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathMIPS64);
};
@@ -366,13 +366,11 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
instruction_,
dex_pc,
this);
+ CheckEntrypointTypes<
+ kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
Primitive::Type ret_type = instruction_->GetType();
Location ret_loc = calling_convention.GetReturnLocation(ret_type);
mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
- CheckEntrypointTypes<kQuickInstanceofNonTrivial,
- uint32_t,
- const mirror::Class*,
- const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast), instruction_, dex_pc, this);
@@ -380,7 +378,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
}
RestoreLiveRegisters(codegen, locations);
- __ B(GetExitLabel());
+ __ Bc(GetExitLabel());
}
const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
@@ -404,6 +402,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
uint32_t dex_pc = deoptimize->GetDexPc();
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
mips64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize), instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
@@ -420,7 +419,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
: CodeGenerator(graph,
kNumberOfGpuRegisters,
kNumberOfFpuRegisters,
- 0, // kNumberOfRegisterPairs
+ /* number_of_register_pairs */ 0,
ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
arraysize(kCoreCalleeSaves)),
ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
@@ -441,6 +440,32 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64WordSize, x).Int32Value()
void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
+ // Ensure that we fix up branches.
+ __ FinalizeCode();
+
+ // Adjust native pc offsets in stack maps.
+ for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) {
+ uint32_t old_position = stack_map_stream_.GetStackMap(i).native_pc_offset;
+ uint32_t new_position = __ GetAdjustedPosition(old_position);
+ DCHECK_GE(new_position, old_position);
+ stack_map_stream_.SetStackMapNativePcOffset(i, new_position);
+ }
+
+ // Adjust pc offsets for the disassembly information.
+ if (disasm_info_ != nullptr) {
+ GeneratedCodeInterval* frame_entry_interval = disasm_info_->GetFrameEntryInterval();
+ frame_entry_interval->start = __ GetAdjustedPosition(frame_entry_interval->start);
+ frame_entry_interval->end = __ GetAdjustedPosition(frame_entry_interval->end);
+ for (auto& it : *disasm_info_->GetInstructionIntervals()) {
+ it.second.start = __ GetAdjustedPosition(it.second.start);
+ it.second.end = __ GetAdjustedPosition(it.second.end);
+ }
+ for (auto& it : *disasm_info_->GetSlowPathIntervals()) {
+ it.code_interval.start = __ GetAdjustedPosition(it.code_interval.start);
+ it.code_interval.end = __ GetAdjustedPosition(it.code_interval.end);
+ }
+ }
+
CodeGenerator::Finalize(allocator);
}
@@ -603,6 +628,7 @@ void CodeGeneratorMIPS64::GenerateFrameExit() {
}
__ Jr(RA);
+ __ Nop();
__ cfi().RestoreState();
__ cfi().DefCFAOffset(GetFrameSize());
@@ -666,9 +692,19 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination,
gpr = destination.AsRegister<GpuRegister>();
}
if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) {
- __ LoadConst32(gpr, GetInt32ValueOf(source.GetConstant()->AsConstant()));
+ int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant());
+ if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = ZERO;
+ } else {
+ __ LoadConst32(gpr, value);
+ }
} else {
- __ LoadConst64(gpr, GetInt64ValueOf(source.GetConstant()->AsConstant()));
+ int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant());
+ if (Primitive::IsFloatingPointType(dst_type) && value == 0) {
+ gpr = ZERO;
+ } else {
+ __ LoadConst64(gpr, value);
+ }
}
if (dst_type == Primitive::kPrimFloat) {
__ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>());
@@ -734,12 +770,22 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination,
// Move to stack from constant
HConstant* src_cst = source.GetConstant();
StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword;
+ GpuRegister gpr = ZERO;
if (destination.IsStackSlot()) {
- __ LoadConst32(TMP, GetInt32ValueOf(src_cst->AsConstant()));
+ int32_t value = GetInt32ValueOf(src_cst->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst32(gpr, value);
+ }
} else {
- __ LoadConst64(TMP, GetInt64ValueOf(src_cst->AsConstant()));
+ DCHECK(destination.IsDoubleStackSlot());
+ int64_t value = GetInt64ValueOf(src_cst->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst64(gpr, value);
+ }
}
- __ StoreToOffset(store_type, TMP, SP, destination.GetStackIndex());
+ __ StoreToOffset(store_type, gpr, SP, destination.GetStackIndex());
} else {
DCHECK(source.IsStackSlot() || source.IsDoubleStackSlot());
DCHECK_EQ(source.IsDoubleStackSlot(), destination.IsDoubleStackSlot());
@@ -755,9 +801,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination,
}
}
-void CodeGeneratorMIPS64::SwapLocations(Location loc1,
- Location loc2,
- Primitive::Type type ATTRIBUTE_UNUSED) {
+void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) {
DCHECK(!loc1.IsConstant());
DCHECK(!loc2.IsConstant());
@@ -781,12 +825,16 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1,
// Swap 2 FPRs
FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>();
FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>();
- // TODO: Can MOV.S/MOV.D be used here to save one instruction?
- // Need to distinguish float from double, right?
- __ Dmfc1(TMP, r2);
- __ Dmfc1(AT, r1);
- __ Dmtc1(TMP, r1);
- __ Dmtc1(AT, r2);
+ if (type == Primitive::kPrimFloat) {
+ __ MovS(FTMP, r1);
+ __ MovS(r1, r2);
+ __ MovS(r2, FTMP);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimDouble);
+ __ MovD(FTMP, r1);
+ __ MovD(r1, r2);
+ __ MovD(r2, FTMP);
+ }
} else if (is_slot1 != is_slot2) {
// Swap GPR/FPR and stack slot
Location reg_loc = is_slot1 ? loc2 : loc1;
@@ -800,7 +848,6 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1,
reg_loc.AsFpuRegister<FpuRegister>(),
SP,
mem_loc.GetStackIndex());
- // TODO: review this MTC1/DMTC1 move
if (mem_loc.IsStackSlot()) {
__ Mtc1(TMP, reg_loc.AsFpuRegister<FpuRegister>());
} else {
@@ -845,12 +892,22 @@ void CodeGeneratorMIPS64::Move(HInstruction* instruction,
} else {
DCHECK(location.IsStackSlot() || location.IsDoubleStackSlot());
// Move to stack from constant
+ GpuRegister gpr = ZERO;
if (location.IsStackSlot()) {
- __ LoadConst32(TMP, GetInt32ValueOf(instruction->AsConstant()));
- __ StoreToOffset(kStoreWord, TMP, SP, location.GetStackIndex());
+ int32_t value = GetInt32ValueOf(instruction->AsConstant());
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst32(gpr, value);
+ }
+ __ StoreToOffset(kStoreWord, gpr, SP, location.GetStackIndex());
} else {
- __ LoadConst64(TMP, instruction->AsLongConstant()->GetValue());
- __ StoreToOffset(kStoreDoubleword, TMP, SP, location.GetStackIndex());
+ DCHECK(location.IsDoubleStackSlot());
+ int64_t value = instruction->AsLongConstant()->GetValue();
+ if (value != 0) {
+ gpr = TMP;
+ __ LoadConst64(gpr, value);
+ }
+ __ StoreToOffset(kStoreDoubleword, gpr, SP, location.GetStackIndex());
}
}
} else if (instruction->IsTemporary()) {
@@ -908,7 +965,7 @@ Location CodeGeneratorMIPS64::GetStackLocation(HLoadLocal* load) const {
}
void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, GpuRegister value) {
- Label done;
+ Mips64Label done;
GpuRegister card = AT;
GpuRegister temp = TMP;
__ Beqzc(value, &done);
@@ -1017,6 +1074,7 @@ void CodeGeneratorMIPS64::InvokeRuntime(int32_t entry_point_offset,
// TODO: anything related to T9/GP/GOT/PIC/.so's?
__ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
__ Jalr(T9);
+ __ Nop();
RecordPcInfo(instruction, dex_pc, slow_path);
}
@@ -1048,7 +1106,7 @@ void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruc
__ Bind(slow_path->GetReturnLabel());
} else {
__ Beqzc(TMP, codegen_->GetLabelOf(successor));
- __ B(slow_path->GetEntryLabel());
+ __ Bc(slow_path->GetEntryLabel());
// slow_path will return to GetLabelOf(successor).
}
}
@@ -1198,7 +1256,7 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
- locations->SetOut(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
default:
@@ -1552,6 +1610,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) {
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
}
break;
}
@@ -1638,12 +1697,7 @@ void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction)
// length is limited by the maximum positive signed 32-bit integer.
// Unsigned comparison of length and index checks for index < 0
// and for length <= index simultaneously.
- // Mips R6 requires lhs != rhs for compact branches.
- if (index == length) {
- __ B(slow_path->GetEntryLabel());
- } else {
- __ Bgeuc(index, length, slow_path->GetEntryLabel());
- }
+ __ Bgeuc(index, length, slow_path->GetEntryLabel());
}
void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
@@ -1707,7 +1761,7 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) {
switch (in_type) {
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -1736,8 +1790,18 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
case Primitive::kPrimLong: {
GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- // TODO: more efficient (direct) comparison with a constant
+ Location rhs_location = locations->InAt(1);
+ bool use_imm = rhs_location.IsConstant();
+ GpuRegister rhs = ZERO;
+ if (use_imm) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant());
+ if (value != 0) {
+ rhs = AT;
+ __ LoadConst64(rhs, value);
+ }
+ } else {
+ rhs = rhs_location.AsRegister<GpuRegister>();
+ }
__ Slt(TMP, lhs, rhs);
__ Slt(dst, rhs, lhs);
__ Subu(dst, dst, TMP);
@@ -1755,6 +1819,19 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) {
: QUICK_ENTRY_POINT(pCmplDouble);
}
codegen_->InvokeRuntime(entry_point_offset, instruction, instruction->GetDexPc(), nullptr);
+ if (in_type == Primitive::kPrimFloat) {
+ if (instruction->IsGtBias()) {
+ CheckEntrypointTypes<kQuickCmpgFloat, int32_t, float, float>();
+ } else {
+ CheckEntrypointTypes<kQuickCmplFloat, int32_t, float, float>();
+ }
+ } else {
+ if (instruction->IsGtBias()) {
+ CheckEntrypointTypes<kQuickCmpgDouble, int32_t, double, double>();
+ } else {
+ CheckEntrypointTypes<kQuickCmplDouble, int32_t, double, double>();
+ }
+ }
break;
}
@@ -1902,6 +1979,252 @@ void InstructionCodeGeneratorMIPS64::VisitCondition(HCondition* instruction) {
}
}
+void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ Primitive::Type type = instruction->GetResultType();
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(imm == 1 || imm == -1);
+
+ if (instruction->IsRem()) {
+ __ Move(out, ZERO);
+ } else {
+ if (imm == -1) {
+ if (type == Primitive::kPrimInt) {
+ __ Subu(out, ZERO, dividend);
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimLong);
+ __ Dsubu(out, ZERO, dividend);
+ }
+ } else if (out != dividend) {
+ __ Move(out, dividend);
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ Primitive::Type type = instruction->GetResultType();
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ uint64_t abs_imm = static_cast<uint64_t>(std::abs(imm));
+ DCHECK(IsPowerOfTwo(abs_imm));
+ int ctz_imm = CTZ(abs_imm);
+
+ if (instruction->IsDiv()) {
+ if (type == Primitive::kPrimInt) {
+ if (ctz_imm == 1) {
+ // Fast path for division by +/-2, which is very common.
+ __ Srl(TMP, dividend, 31);
+ } else {
+ __ Sra(TMP, dividend, 31);
+ __ Srl(TMP, TMP, 32 - ctz_imm);
+ }
+ __ Addu(out, dividend, TMP);
+ __ Sra(out, out, ctz_imm);
+ if (imm < 0) {
+ __ Subu(out, ZERO, out);
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimLong);
+ if (ctz_imm == 1) {
+ // Fast path for division by +/-2, which is very common.
+ __ Dsrl32(TMP, dividend, 31);
+ } else {
+ __ Dsra32(TMP, dividend, 31);
+ if (ctz_imm > 32) {
+ __ Dsrl(TMP, TMP, 64 - ctz_imm);
+ } else {
+ __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+ }
+ }
+ __ Daddu(out, dividend, TMP);
+ if (ctz_imm < 32) {
+ __ Dsra(out, out, ctz_imm);
+ } else {
+ __ Dsra32(out, out, ctz_imm - 32);
+ }
+ if (imm < 0) {
+ __ Dsubu(out, ZERO, out);
+ }
+ }
+ } else {
+ if (type == Primitive::kPrimInt) {
+ if (ctz_imm == 1) {
+ // Fast path for modulo +/-2, which is very common.
+ __ Sra(TMP, dividend, 31);
+ __ Subu(out, dividend, TMP);
+ __ Andi(out, out, 1);
+ __ Addu(out, out, TMP);
+ } else {
+ __ Sra(TMP, dividend, 31);
+ __ Srl(TMP, TMP, 32 - ctz_imm);
+ __ Addu(out, dividend, TMP);
+ if (IsUint<16>(abs_imm - 1)) {
+ __ Andi(out, out, abs_imm - 1);
+ } else {
+ __ Sll(out, out, 32 - ctz_imm);
+ __ Srl(out, out, 32 - ctz_imm);
+ }
+ __ Subu(out, out, TMP);
+ }
+ } else {
+ DCHECK_EQ(type, Primitive::kPrimLong);
+ if (ctz_imm == 1) {
+ // Fast path for modulo +/-2, which is very common.
+ __ Dsra32(TMP, dividend, 31);
+ __ Dsubu(out, dividend, TMP);
+ __ Andi(out, out, 1);
+ __ Daddu(out, out, TMP);
+ } else {
+ __ Dsra32(TMP, dividend, 31);
+ if (ctz_imm > 32) {
+ __ Dsrl(TMP, TMP, 64 - ctz_imm);
+ } else {
+ __ Dsrl32(TMP, TMP, 32 - ctz_imm);
+ }
+ __ Daddu(out, dividend, TMP);
+ if (IsUint<16>(abs_imm - 1)) {
+ __ Andi(out, out, abs_imm - 1);
+ } else {
+ if (ctz_imm > 32) {
+ __ Dsll(out, out, 64 - ctz_imm);
+ __ Dsrl(out, out, 64 - ctz_imm);
+ } else {
+ __ Dsll32(out, out, 32 - ctz_imm);
+ __ Dsrl32(out, out, 32 - ctz_imm);
+ }
+ }
+ __ Dsubu(out, out, TMP);
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ DCHECK(second.IsConstant());
+
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+
+ Primitive::Type type = instruction->GetResultType();
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+ int64_t magic;
+ int shift;
+ CalculateMagicAndShiftForDivRem(imm,
+ (type == Primitive::kPrimLong),
+ &magic,
+ &shift);
+
+ if (type == Primitive::kPrimInt) {
+ __ LoadConst32(TMP, magic);
+ __ MuhR6(TMP, dividend, TMP);
+
+ if (imm > 0 && magic < 0) {
+ __ Addu(TMP, TMP, dividend);
+ } else if (imm < 0 && magic > 0) {
+ __ Subu(TMP, TMP, dividend);
+ }
+
+ if (shift != 0) {
+ __ Sra(TMP, TMP, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ Sra(out, TMP, 31);
+ __ Subu(out, TMP, out);
+ } else {
+ __ Sra(AT, TMP, 31);
+ __ Subu(AT, TMP, AT);
+ __ LoadConst32(TMP, imm);
+ __ MulR6(TMP, AT, TMP);
+ __ Subu(out, dividend, TMP);
+ }
+ } else {
+ __ LoadConst64(TMP, magic);
+ __ Dmuh(TMP, dividend, TMP);
+
+ if (imm > 0 && magic < 0) {
+ __ Daddu(TMP, TMP, dividend);
+ } else if (imm < 0 && magic > 0) {
+ __ Dsubu(TMP, TMP, dividend);
+ }
+
+ if (shift >= 32) {
+ __ Dsra32(TMP, TMP, shift - 32);
+ } else if (shift > 0) {
+ __ Dsra(TMP, TMP, shift);
+ }
+
+ if (instruction->IsDiv()) {
+ __ Dsra32(out, TMP, 31);
+ __ Dsubu(out, TMP, out);
+ } else {
+ __ Dsra32(AT, TMP, 31);
+ __ Dsubu(AT, TMP, AT);
+ __ LoadConst64(TMP, imm);
+ __ Dmul(TMP, AT, TMP);
+ __ Dsubu(out, dividend, TMP);
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
+ DCHECK(instruction->IsDiv() || instruction->IsRem());
+ Primitive::Type type = instruction->GetResultType();
+ DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type;
+
+ LocationSummary* locations = instruction->GetLocations();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ Location second = locations->InAt(1);
+
+ if (second.IsConstant()) {
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ } else if (imm == 1 || imm == -1) {
+ DivRemOneOrMinusOne(instruction);
+ } else if (IsPowerOfTwo(std::abs(imm))) {
+ DivRemByPowerOfTwo(instruction);
+ } else {
+ DCHECK(imm <= -2 || imm >= 2);
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+ } else {
+ GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister divisor = second.AsRegister<GpuRegister>();
+ if (instruction->IsDiv()) {
+ if (type == Primitive::kPrimInt)
+ __ DivR6(out, dividend, divisor);
+ else
+ __ Ddiv(out, dividend, divisor);
+ } else {
+ if (type == Primitive::kPrimInt)
+ __ ModR6(out, dividend, divisor);
+ else
+ __ Dmod(out, dividend, divisor);
+ }
+ }
+}
+
void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
@@ -1909,7 +2232,7 @@ void LocationsBuilderMIPS64::VisitDiv(HDiv* div) {
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -1931,16 +2254,9 @@ void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) {
switch (type) {
case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- if (type == Primitive::kPrimInt)
- __ DivR6(dst, lhs, rhs);
- else
- __ Ddiv(dst, lhs, rhs);
+ case Primitive::kPrimLong:
+ GenerateDivRemIntegral(instruction);
break;
- }
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -1984,7 +2300,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio
if (value.IsConstant()) {
int64_t divisor = codegen_->GetInt64ValueOf(value.GetConstant()->AsConstant());
if (divisor == 0) {
- __ B(slow_path->GetEntryLabel());
+ __ Bc(slow_path->GetEntryLabel());
} else {
// A division by a non-null constant is valid. We don't need to perform
// any check, so simply fall through.
@@ -2036,7 +2352,7 @@ void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock*
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
- __ B(codegen_->GetLabelOf(successor));
+ __ Bc(codegen_->GetLabelOf(successor));
}
}
@@ -2060,30 +2376,51 @@ void InstructionCodeGeneratorMIPS64::VisitTryBoundary(HTryBoundary* try_boundary
}
void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruction,
- Label* true_target,
- Label* false_target,
- Label* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- HCondition* condition = cond->AsCondition();
-
- if (cond->IsIntConstant()) {
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ B(always_true_target);
- }
- return;
+ size_t condition_input_index,
+ Mips64Label* true_target,
+ Mips64Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
+ // Constant condition, statically compared against 1.
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ Bc(true_target);
+ }
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ Bc(false_target);
+ }
}
- } else if (!cond->IsCondition() || condition->NeedsMaterialization()) {
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
// The condition instruction has been materialized, compare the output to 0.
- Location cond_val = instruction->GetLocations()->InAt(0);
+ Location cond_val = instruction->GetLocations()->InAt(condition_input_index);
DCHECK(cond_val.IsRegister());
- __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+ if (true_target == nullptr) {
+ __ Beqzc(cond_val.AsRegister<GpuRegister>(), false_target);
+ } else {
+ __ Bnezc(cond_val.AsRegister<GpuRegister>(), true_target);
+ }
} else {
// The condition instruction has not been materialized, use its inputs as
// the comparison and its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
GpuRegister lhs = condition->GetLocations()->InAt(0).AsRegister<GpuRegister>();
Location rhs_location = condition->GetLocations()->InAt(1);
GpuRegister rhs_reg = ZERO;
@@ -2095,37 +2432,46 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc
rhs_reg = rhs_location.AsRegister<GpuRegister>();
}
- IfCondition if_cond = condition->GetCondition();
+ IfCondition if_cond;
+ Mips64Label* non_fallthrough_target;
+ if (true_target == nullptr) {
+ if_cond = condition->GetOppositeCondition();
+ non_fallthrough_target = false_target;
+ } else {
+ if_cond = condition->GetCondition();
+ non_fallthrough_target = true_target;
+ }
+
if (use_imm && rhs_imm == 0) {
switch (if_cond) {
case kCondEQ:
- __ Beqzc(lhs, true_target);
+ __ Beqzc(lhs, non_fallthrough_target);
break;
case kCondNE:
- __ Bnezc(lhs, true_target);
+ __ Bnezc(lhs, non_fallthrough_target);
break;
case kCondLT:
- __ Bltzc(lhs, true_target);
+ __ Bltzc(lhs, non_fallthrough_target);
break;
case kCondGE:
- __ Bgezc(lhs, true_target);
+ __ Bgezc(lhs, non_fallthrough_target);
break;
case kCondLE:
- __ Blezc(lhs, true_target);
+ __ Blezc(lhs, non_fallthrough_target);
break;
case kCondGT:
- __ Bgtzc(lhs, true_target);
+ __ Bgtzc(lhs, non_fallthrough_target);
break;
case kCondB:
break; // always false
case kCondBE:
- __ Beqzc(lhs, true_target); // <= 0 if zero
+ __ Beqzc(lhs, non_fallthrough_target); // <= 0 if zero
break;
case kCondA:
- __ Bnezc(lhs, true_target); // > 0 if non-zero
+ __ Bnezc(lhs, non_fallthrough_target); // > 0 if non-zero
break;
case kCondAE:
- __ B(true_target); // always true
+ __ Bc(non_fallthrough_target); // always true
break;
}
} else {
@@ -2133,96 +2479,69 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc
rhs_reg = TMP;
__ LoadConst32(rhs_reg, rhs_imm);
}
- // It looks like we can get here with lhs == rhs. Should that be possible at all?
- // Mips R6 requires lhs != rhs for compact branches.
- if (lhs == rhs_reg) {
- DCHECK(!use_imm);
- switch (if_cond) {
- case kCondEQ:
- case kCondGE:
- case kCondLE:
- case kCondBE:
- case kCondAE:
- // if lhs == rhs for a positive condition, then it is a branch
- __ B(true_target);
- break;
- case kCondNE:
- case kCondLT:
- case kCondGT:
- case kCondB:
- case kCondA:
- // if lhs == rhs for a negative condition, then it is a NOP
- break;
- }
- } else {
- switch (if_cond) {
- case kCondEQ:
- __ Beqc(lhs, rhs_reg, true_target);
- break;
- case kCondNE:
- __ Bnec(lhs, rhs_reg, true_target);
- break;
- case kCondLT:
- __ Bltc(lhs, rhs_reg, true_target);
- break;
- case kCondGE:
- __ Bgec(lhs, rhs_reg, true_target);
- break;
- case kCondLE:
- __ Bgec(rhs_reg, lhs, true_target);
- break;
- case kCondGT:
- __ Bltc(rhs_reg, lhs, true_target);
- break;
- case kCondB:
- __ Bltuc(lhs, rhs_reg, true_target);
- break;
- case kCondAE:
- __ Bgeuc(lhs, rhs_reg, true_target);
- break;
- case kCondBE:
- __ Bgeuc(rhs_reg, lhs, true_target);
- break;
- case kCondA:
- __ Bltuc(rhs_reg, lhs, true_target);
- break;
- }
+ switch (if_cond) {
+ case kCondEQ:
+ __ Beqc(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondNE:
+ __ Bnec(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondLT:
+ __ Bltc(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondGE:
+ __ Bgec(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondLE:
+ __ Bgec(rhs_reg, lhs, non_fallthrough_target);
+ break;
+ case kCondGT:
+ __ Bltc(rhs_reg, lhs, non_fallthrough_target);
+ break;
+ case kCondB:
+ __ Bltuc(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondAE:
+ __ Bgeuc(lhs, rhs_reg, non_fallthrough_target);
+ break;
+ case kCondBE:
+ __ Bgeuc(rhs_reg, lhs, non_fallthrough_target);
+ break;
+ case kCondA:
+ __ Bltuc(rhs_reg, lhs, non_fallthrough_target);
+ break;
}
}
}
- if (false_target != nullptr) {
- __ B(false_target);
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
+ __ Bc(false_target);
}
}
void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) {
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
- Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- Label* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ Mips64Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::RequiresRegister());
}
}
@@ -2231,8 +2550,10 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
DeoptimizationSlowPathMIPS64(deoptimize);
codegen_->AddSlowPath(slow_path);
- Label* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
@@ -2387,7 +2708,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- Label done;
+ Mips64Label done;
// Return 0 if `obj` is null.
// TODO: Avoid this check if we know `obj` is not null.
@@ -2482,6 +2803,7 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo
__ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
+ __ Nop();
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -2512,10 +2834,12 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in
// allocation of a register for the current method pointer like on x86 baseline.
// TODO: remove this once all the issues with register saving/restoring are
// sorted out.
- LocationSummary* locations = invoke->GetLocations();
- Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
- if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
- locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+ if (invoke->HasCurrentMethodInput()) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location location = locations->InAt(invoke->GetSpecialInputIndex());
+ if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
+ locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
+ }
}
}
@@ -2572,7 +2896,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
invoke->GetStringInitOffset());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ LoadConst64(temp.AsRegister<GpuRegister>(), invoke->GetMethodAddress());
@@ -2584,7 +2908,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
LOG(FATAL) << "Unsupported";
UNREACHABLE();
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
GpuRegister reg = temp.AsRegister<GpuRegister>();
GpuRegister method_reg;
if (current_method.IsRegister()) {
@@ -2614,13 +2938,14 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
switch (invoke->GetCodePtrLocation()) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
- __ Jalr(&frame_entry_label_, T9);
+ __ Jialc(&frame_entry_label_, T9);
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
// LR = invoke->GetDirectCodePtr();
__ LoadConst64(T9, invoke->GetDirectCodePtr());
// LR()
__ Jalr(T9);
+ __ Nop();
break;
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
case HInvokeStaticOrDirect::CodePtrLocation::kCallPCRelative:
@@ -2637,6 +2962,7 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
kMips64WordSize).Int32Value());
// T9()
__ Jalr(T9);
+ __ Nop();
break;
}
DCHECK(!IsLeafMethod());
@@ -2659,29 +2985,38 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
-void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
- if (TryGenerateIntrinsicCode(invoke, codegen_)) {
- return;
- }
+void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ GpuRegister receiver = calling_convention.GetRegisterAt(0);
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
- GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister temp = temp_location.AsRegister<GpuRegister>();
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kMips64PointerSize).SizeValue();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64WordSize);
// temp = object->GetClass();
- DCHECK(receiver.IsRegister());
- __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
- codegen_->MaybeRecordImplicitNullCheck(invoke);
+ __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
+ MaybeRecordImplicitNullCheck(invoke);
// temp = temp->GetMethodAt(method_offset);
__ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
// T9 = temp->GetEntryPoint();
__ LoadFromOffset(kLoadDoubleword, T9, temp, entry_point.Int32Value());
// T9();
__ Jalr(T9);
+ __ Nop();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+ return;
+ }
+
+ codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -2691,7 +3026,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(A0));
+ calling_convention.GetReturnLocation(cls->GetType()));
}
void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
@@ -2702,6 +3037,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
cls,
cls->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
@@ -2713,22 +3049,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) {
__ LoadFromOffset(kLoadUnsignedWord, out, current_method,
ArtMethod::DeclaringClassOffset().Int32Value());
} else {
- DCHECK(cls->CanCallRuntime());
__ LoadFromOffset(kLoadDoubleword, out, current_method,
ArtMethod::DexCacheResolvedTypesOffset(kMips64PointerSize).Int32Value());
__ LoadFromOffset(kLoadUnsignedWord, out, out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex()));
// TODO: We will need a read barrier here.
- SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
- cls,
- cls,
- cls->GetDexPc(),
- cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ Beqzc(out, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
- } else {
- __ Bind(slow_path->GetExitLabel());
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64(
+ cls,
+ cls,
+ cls->GetDexPc(),
+ cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (!cls->IsInDexCache()) {
+ __ Beqzc(out, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -2818,7 +3158,11 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in
instruction,
instruction->GetDexPc(),
nullptr);
- CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
}
void LocationsBuilderMIPS64::VisitMul(HMul* mul) {
@@ -2952,15 +3296,12 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot));
}
void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
- LocationSummary* locations = instruction->GetLocations();
- // Move an uint16_t value to a register.
- __ LoadConst32(locations->GetTemp(0).AsRegister<GpuRegister>(), instruction->GetTypeIndex());
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
@@ -3108,7 +3449,7 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
case Primitive::kPrimInt:
case Primitive::kPrimLong:
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -3128,26 +3469,23 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) {
void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
Primitive::Type type = instruction->GetType();
- LocationSummary* locations = instruction->GetLocations();
switch (type) {
case Primitive::kPrimInt:
- case Primitive::kPrimLong: {
- GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- if (type == Primitive::kPrimInt)
- __ ModR6(dst, lhs, rhs);
- else
- __ Dmod(dst, lhs, rhs);
+ case Primitive::kPrimLong:
+ GenerateDivRemIntegral(instruction);
break;
- }
case Primitive::kPrimFloat:
case Primitive::kPrimDouble: {
int32_t entry_offset = (type == Primitive::kPrimFloat) ? QUICK_ENTRY_POINT(pFmodf)
: QUICK_ENTRY_POINT(pFmod);
codegen_->InvokeRuntime(entry_offset, instruction, instruction->GetDexPc(), nullptr);
+ if (type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+ } else {
+ CheckEntrypointTypes<kQuickFmod, double, double, double>();
+ }
break;
}
default:
@@ -3457,6 +3795,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
conversion,
conversion->GetDexPc(),
nullptr);
+ if (result_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickL2f, float, int64_t>();
+ } else {
+ CheckEntrypointTypes<kQuickL2d, double, int64_t>();
+ }
}
} else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) {
CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong);
@@ -3472,6 +3815,19 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver
conversion,
conversion->GetDexPc(),
nullptr);
+ if (result_type != Primitive::kPrimLong) {
+ if (input_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickF2iz, int32_t, float>();
+ } else {
+ CheckEntrypointTypes<kQuickD2iz, int32_t, double>();
+ }
+ } else {
+ if (input_type == Primitive::kPrimFloat) {
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
+ } else {
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
+ }
+ }
} else if (Primitive::IsFloatingPointType(result_type) &&
Primitive::IsFloatingPointType(input_type)) {
FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
@@ -3623,7 +3979,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins
const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
for (int32_t i = 0; i < num_entries; i++) {
int32_t case_value = lower_bound + i;
- Label* succ = codegen_->GetLabelOf(successors[i]);
+ Mips64Label* succ = codegen_->GetLabelOf(successors[i]);
if (case_value == 0) {
__ Beqzc(value_reg, succ);
} else {
@@ -3634,7 +3990,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins
// And the default for any other value.
if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
- __ B(codegen_->GetLabelOf(default_block));
+ __ Bc(codegen_->GetLabelOf(default_block));
}
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 9bbd02759a..85e3a4a3ce 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -119,9 +119,12 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention {
Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::RegisterLocation(V0);
}
- Location GetSetValueLocation(
- Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE {
- return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1);
+ Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+ return Primitive::Is64BitType(type)
+ ? Location::RegisterLocation(A2)
+ : (is_instance
+ ? Location::RegisterLocation(A2)
+ : Location::RegisterLocation(A1));
}
Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
return Location::FpuRegisterLocation(F0);
@@ -155,12 +158,12 @@ class SlowPathCodeMIPS64 : public SlowPathCode {
public:
SlowPathCodeMIPS64() : entry_label_(), exit_label_() {}
- Label* GetEntryLabel() { return &entry_label_; }
- Label* GetExitLabel() { return &exit_label_; }
+ Mips64Label* GetEntryLabel() { return &entry_label_; }
+ Mips64Label* GetExitLabel() { return &exit_label_; }
private:
- Label entry_label_;
- Label exit_label_;
+ Mips64Label entry_label_;
+ Mips64Label exit_label_;
DISALLOW_COPY_AND_ASSIGN(SlowPathCodeMIPS64);
};
@@ -227,9 +230,13 @@ class InstructionCodeGeneratorMIPS64 : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
- Label* true_target,
- Label* false_target,
- Label* always_true_target);
+ size_t condition_input_index,
+ Mips64Label* true_target,
+ Mips64Label* false_target);
+ void DivRemOneOrMinusOne(HBinaryOperation* instruction);
+ void DivRemByPowerOfTwo(HBinaryOperation* instruction);
+ void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
+ void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
Mips64Assembler* const assembler_;
@@ -258,7 +265,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return kMips64WordSize; }
uintptr_t GetAddressOf(HBasicBlock* block) const OVERRIDE {
- return GetLabelOf(block)->Position();
+ return assembler_.GetLabelLocation(GetLabelOf(block));
}
HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
@@ -291,12 +298,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
return isa_features_;
}
- Label* GetLabelOf(HBasicBlock* block) const {
- return CommonGetLabelOf<Label>(block_labels_, block);
+ Mips64Label* GetLabelOf(HBasicBlock* block) const {
+ return CommonGetLabelOf<Mips64Label>(block_labels_, block);
}
void Initialize() OVERRIDE {
- block_labels_ = CommonInitializeLabels<Label>();
+ block_labels_ = CommonInitializeLabels<Mips64Label>();
}
void Finalize(CodeAllocator* allocator) OVERRIDE;
@@ -333,10 +340,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
MethodReference target_method) OVERRIDE;
void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
- void GenerateVirtualCall(HInvokeVirtual* invoke ATTRIBUTE_UNUSED,
- Location temp ATTRIBUTE_UNUSED) OVERRIDE {
- UNIMPLEMENTED(FATAL);
- }
+ void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
@@ -345,8 +349,8 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
private:
// Labels for each block that will be compiled.
- Label* block_labels_; // Indexed by block id.
- Label frame_entry_label_;
+ Mips64Label* block_labels_; // Indexed by block id.
+ Mips64Label frame_entry_label_;
LocationsBuilderMIPS64 location_builder_;
InstructionCodeGeneratorMIPS64 instruction_visitor_;
ParallelMoveResolverMIPS64 move_resolver_;
diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc
index 921c1d86c2..644a3fb75e 100644
--- a/compiler/optimizing/code_generator_utils.cc
+++ b/compiler/optimizing/code_generator_utils.cc
@@ -15,6 +15,7 @@
*/
#include "code_generator_utils.h"
+#include "nodes.h"
#include "base/logging.h"
@@ -94,4 +95,8 @@ void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long,
*shift = is_long ? p - 64 : p - 32;
}
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input) {
+ return !cond_input->IsCondition() || cond_input->AsCondition()->NeedsMaterialization();
+}
+
} // namespace art
diff --git a/compiler/optimizing/code_generator_utils.h b/compiler/optimizing/code_generator_utils.h
index 59b495c2c9..7efed8c9ec 100644
--- a/compiler/optimizing/code_generator_utils.h
+++ b/compiler/optimizing/code_generator_utils.h
@@ -21,10 +21,17 @@
namespace art {
+class HInstruction;
+
// Computes the magic number and the shift needed in the div/rem by constant algorithm, as out
// arguments `magic` and `shift`
void CalculateMagicAndShiftForDivRem(int64_t divisor, bool is_long, int64_t* magic, int* shift);
+// Returns true if `cond_input` is expected to have a location. Assumes that
+// `cond_input` is a conditional input of the currently emitted instruction and
+// that it has been previously visited by the InstructionCodeGenerator.
+bool IsBooleanValueOrMaterializedCondition(HInstruction* cond_input);
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_CODE_GENERATOR_UTILS_H_
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0df7e3b30a..2fb87d3029 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -19,7 +19,6 @@
#include "art_method.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
-#include "constant_area_fixups_x86.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
@@ -27,6 +26,7 @@
#include "intrinsics_x86.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "pc_relative_fixups_x86.h"
#include "thread.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -35,6 +35,9 @@
namespace art {
+template<class MirrorType>
+class GcRoot;
+
namespace x86 {
static constexpr int kCurrentMethodStackOffset = 0;
@@ -64,6 +67,7 @@ class NullCheckSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -90,6 +94,7 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -149,6 +154,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -174,6 +180,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
@@ -219,6 +226,7 @@ class LoadStringSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
RestoreLiveRegisters(codegen, locations);
@@ -254,6 +262,11 @@ class LoadClassSlowPathX86 : public SlowPathCode {
x86_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
: QUICK_ENTRY_POINT(pInitializeType),
at_, dex_pc_, this);
+ if (do_clinit_) {
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ } else {
+ CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ }
// Move the class to the desired location.
Location out = locations->Out();
@@ -300,15 +313,6 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->IsCheckCast()) {
- // The codegen for the instruction overwrites `temp`, so put it back in place.
- Register obj = locations->InAt(0).AsRegister<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- __ movl(temp, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(temp);
- }
-
if (!is_fatal_) {
SaveLiveRegisters(codegen, locations);
}
@@ -329,12 +333,15 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<
+ kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
if (!is_fatal_) {
@@ -371,6 +378,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
@@ -413,6 +421,7 @@ class ArraySetSlowPathX86 : public SlowPathCode {
instruction_,
instruction_->GetDexPc(),
this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -425,6 +434,221 @@ class ArraySetSlowPathX86 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
};
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
+ public:
+ ReadBarrierForHeapReferenceSlowPathX86(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : instruction_(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial object
+ // has been overwritten by (or after) the heap object reference load
+ // to be instrumented, e.g.:
+ //
+ // __ movl(out, Address(out, offset));
+ // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+ }
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(!instruction_->IsInvoke() ||
+ (instruction_->IsInvokeStaticOrDirect() &&
+ instruction_->GetLocations()->Intrinsified()));
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+ if (instruction_->IsArrayGet()) {
+ // Compute the actual memory offset and store it in `index`.
+ Register index_reg = index_.AsRegister<Register>();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::x86::X86Assembler::shll and
+ // art::x86::X86Assembler::AddImmediate below), but it has
+ // not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen);
+ __ movl(free_reg, index_reg);
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the scale
+ // factor (2) cannot overflow in practice, as the runtime is
+ // unable to allocate object arrays with a size larger than
+ // 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ shll(index_reg, Immediate(TIMES_4));
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ AddImmediate(index_reg, Immediate(offset_));
+ } else {
+ DCHECK(instruction_->IsInvoke());
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegisterPair());
+ // UnsafeGet's offset location is a register pair, the low
+ // part contains the correct offset.
+ index = index_.ToLow();
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ movl(calling_convention.GetRegisterAt(2), Immediate(offset_));
+ }
+ x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
+
+ private:
+ Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+ size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+ return static_cast<Register>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on x86
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
+ public:
+ ReadBarrierForRootSlowPathX86(HInstruction* instruction, Location out, Location root)
+ : instruction_(instruction), out_(out), root_(root) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ Register reg_out = out_.AsRegister<Register>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+ x86_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ x86_codegen->Move32(out_, Location::RegisterLocation(EAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86);
+};
+
#undef __
#define __ down_cast<X86Assembler*>(GetAssembler())->
@@ -513,9 +737,9 @@ void CodeGeneratorX86::InvokeRuntime(int32_t entry_point_offset,
}
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats)
+ const X86InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfXmmRegisters,
@@ -533,6 +757,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
isa_features_(isa_features),
method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
// Use a fake return address register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
@@ -581,7 +806,7 @@ Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const {
LOG(FATAL) << "Unreachable type " << type;
}
- return Location();
+ return Location::NoLocation();
}
void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
@@ -782,7 +1007,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t
LOG(FATAL) << "Unexpected parameter type " << type;
break;
}
- return Location();
+ return Location::NoLocation();
}
void CodeGeneratorX86::Move32(Location destination, Location source) {
@@ -1157,26 +1382,19 @@ void InstructionCodeGeneratorX86::GenerateLongComparesAndJumps(HCondition* cond,
__ j(final_condition, true_label);
}
-void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
- HCondition* condition,
- Label* true_target,
- Label* false_target,
- Label* always_true_target) {
+void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condition,
+ Label* true_target_in,
+ Label* false_target_in) {
+ // Generated branching requires both targets to be explicit. If either of the
+ // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+ Label fallthrough_target;
+ Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+ Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
LocationSummary* locations = condition->GetLocations();
Location left = locations->InAt(0);
Location right = locations->InAt(1);
- // We don't want true_target as a nullptr.
- if (true_target == nullptr) {
- true_target = always_true_target;
- }
- bool falls_through = (false_target == nullptr);
-
- // FP compares don't like null false_targets.
- if (false_target == nullptr) {
- false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- }
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong:
@@ -1194,119 +1412,141 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HIf* if_instr,
LOG(FATAL) << "Unexpected compare type " << type;
}
- if (!falls_through) {
+ if (false_target != &fallthrough_target) {
__ jmp(false_target);
}
+
+ if (fallthrough_target.IsLinked()) {
+ __ Bind(&fallthrough_target);
+ }
+}
+
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+ // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+ // are set only strictly before `branch`. We can't use the eflags on long/FP
+ // conditions if they are materialized due to the complex branching.
+ return cond->IsCondition() &&
+ cond->GetNext() == branch &&
+ cond->InputAt(0)->GetType() != Primitive::kPrimLong &&
+ !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
}
void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- if (cond->IsIntConstant()) {
+ Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ jmp(always_true_target);
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ jmp(true_target);
}
- return;
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
}
- } else {
- bool is_materialized =
- !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
- // Moves do not affect the eflags register, so if the condition is
- // evaluated just before the if, we don't need to evaluate it
- // again. We can't use the eflags on long/FP conditions if they are
- // materialized due to the complex branching.
- Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
- bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
- && (type != Primitive::kPrimLong && !Primitive::IsFloatingPointType(type));
- if (is_materialized) {
- if (!eflags_set) {
- // Materialized condition, compare against 0.
- Location lhs = instruction->GetLocations()->InAt(0);
- if (lhs.IsRegister()) {
- __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
- } else {
- __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
- }
- __ j(kNotEqual, true_target);
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ if (AreEflagsSetFrom(cond, instruction)) {
+ if (true_target == nullptr) {
+ __ j(X86Condition(cond->AsCondition()->GetOppositeCondition()), false_target);
} else {
__ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
- // Condition has not been materialized, use its inputs as the
- // comparison and its condition as the branch condition.
-
- // Is this a long or FP comparison that has been folded into the HCondition?
- if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
- // Generate the comparison directly.
- GenerateCompareTestAndBranch(instruction->AsIf(),
- cond->AsCondition(),
- true_target,
- false_target,
- always_true_target);
- return;
+ // Materialized condition, compare against 0.
+ Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+ if (lhs.IsRegister()) {
+ __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
+ } else {
+ __ cmpl(Address(ESP, lhs.GetStackIndex()), Immediate(0));
+ }
+ if (true_target == nullptr) {
+ __ j(kEqual, false_target);
+ } else {
+ __ j(kNotEqual, true_target);
}
+ }
+ } else {
+ // Condition has not been materialized, use its inputs as the comparison and
+ // its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
+ // If this is a long or FP comparison that has been folded into
+ // the HCondition, generate the comparison directly.
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ GenerateCompareTestAndBranch(condition, true_target, false_target);
+ return;
+ }
- Location lhs = cond->GetLocations()->InAt(0);
- Location rhs = cond->GetLocations()->InAt(1);
- // LHS is guaranteed to be in a register (see
- // LocationsBuilderX86::VisitCondition).
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
- } else {
- __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
- }
+ Location lhs = condition->GetLocations()->InAt(0);
+ Location rhs = condition->GetLocations()->InAt(1);
+ // LHS is guaranteed to be in a register (see LocationsBuilderX86::VisitCondition).
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<Register>(), rhs.AsRegister<Register>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<Register>(), lhs.AsRegister<Register>());
} else {
- __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+ __ cmpl(lhs.AsRegister<Register>(), Immediate(constant));
}
- __ j(X86Condition(cond->AsCondition()->GetCondition()), true_target);
+ } else {
+ __ cmpl(lhs.AsRegister<Register>(), Address(ESP, rhs.GetStackIndex()));
+ }
+ if (true_target == nullptr) {
+ __ j(X86Condition(condition->GetOppositeCondition()), false_target);
+ } else {
+ __ j(X86Condition(condition->GetCondition()), true_target);
}
}
- if (false_target != nullptr) {
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
__ jmp(false_target);
}
}
void LocationsBuilderX86::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
}
void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
- Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- Label* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
}
@@ -1315,8 +1555,10 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCode* slow_path = new (GetGraph()->GetArena())
DeoptimizationSlowPathX86(deoptimize);
codegen_->AddSlowPath(slow_path);
- Label* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderX86::VisitLocal(HLocal* local) {
@@ -1677,19 +1919,27 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
+ if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+ invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+ }
return;
}
HandleInvoke(invoke);
+ // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
+ if (invoke->HasPcRelativeDexCache()) {
+ invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
+ }
+
if (codegen_->IsBaseline()) {
// Baseline does not have enough registers if the current method also
// needs a register. We therefore do not require a register for it, and let
// the code generation of the invoke handle it.
LocationSummary* locations = invoke->GetLocations();
- Location location = locations->InAt(invoke->GetCurrentMethodInputIndex());
+ Location location = locations->InAt(invoke->GetSpecialInputIndex());
if (location.IsUnallocated() && location.GetPolicy() == Location::kRequiresRegister) {
- locations->SetInAt(invoke->GetCurrentMethodInputIndex(), Location::NoLocation());
+ locations->SetInAt(invoke->GetSpecialInputIndex(), Location::NoLocation());
}
}
}
@@ -1719,6 +1969,11 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec
}
void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+ IntrinsicLocationsBuilderX86 intrinsic(codegen_);
+ if (intrinsic.TryDispatch(invoke)) {
+ return;
+ }
+
HandleInvoke(invoke);
}
@@ -1738,6 +1993,9 @@ void InstructionCodeGeneratorX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
}
void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
+ // This call to HandleInvoke allocates a temporary (core) register
+ // which is also used to transfer the hidden argument from FP to
+ // core register.
HandleInvoke(invoke);
// Add the hidden argument.
invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
@@ -1745,31 +2003,42 @@ void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
- Register temp = invoke->GetLocations()->GetTemp(0).AsRegister<Register>();
+ LocationSummary* locations = invoke->GetLocations();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ XmmRegister hidden_reg = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
invoke->GetImtIndex() % mirror::Class::kImtSize, kX86PointerSize).Uint32Value();
- LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- // Set the hidden argument.
+ // Set the hidden argument. This is safe to do this here, as XMM7
+ // won't be modified thereafter, before the `call` instruction.
+ DCHECK_EQ(XMM7, hidden_reg);
__ movl(temp, Immediate(invoke->GetDexMethodIndex()));
- __ movd(invoke->GetLocations()->GetTemp(1).AsFpuRegister<XmmRegister>(), temp);
+ __ movd(hidden_reg, temp);
- // temp = object->GetClass();
if (receiver.IsStackSlot()) {
__ movl(temp, Address(ESP, receiver.GetStackIndex()));
+ // /* HeapReference<Class> */ temp = temp->klass_
__ movl(temp, Address(temp, class_offset));
} else {
+ // /* HeapReference<Class> */ temp = receiver->klass_
__ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetImtEntryAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86WordSize).Int32Value()));
+ __ call(Address(temp,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2207,6 +2476,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
conversion,
conversion->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickF2l, int64_t, float>();
break;
case Primitive::kPrimDouble:
@@ -2215,6 +2485,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio
conversion,
conversion->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickD2l, int64_t, double>();
break;
default:
@@ -2995,7 +3266,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
DCHECK_EQ(EAX, first.AsRegister<Register>());
DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>());
- if (instruction->InputAt(1)->IsIntConstant()) {
+ if (second.IsConstant()) {
int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
if (imm == 0) {
@@ -3045,11 +3316,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
} else {
codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pLmod),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
}
break;
}
@@ -3516,19 +3789,18 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
locations->SetOut(Location::RegisterLocation(EAX));
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
- InvokeRuntimeCallingConvention calling_convention;
- __ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -3545,13 +3817,13 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
__ movl(calling_convention.GetRegisterAt(0), Immediate(instruction->GetTypeIndex()));
-
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -3760,16 +4032,6 @@ void InstructionCodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
MethodReference target_method ATTRIBUTE_UNUSED) {
- if (desired_dispatch_info.method_load_kind ==
- HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative) {
- // TODO: Implement this type. For the moment, we fall back to kDexCacheViaMethod.
- return HInvokeStaticOrDirect::DispatchInfo {
- HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u,
- 0u
- };
- }
switch (desired_dispatch_info.code_ptr_location) {
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirectWithFixup:
case HInvokeStaticOrDirect::CodePtrLocation::kCallDirect:
@@ -3786,6 +4048,32 @@ HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOr
}
}
+Register CodeGeneratorX86::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+ Register temp) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+ if (!invoke->GetLocations()->Intrinsified()) {
+ return location.AsRegister<Register>();
+ }
+ // For intrinsics we allow any location, so it may be on the stack.
+ if (!location.IsRegister()) {
+ __ movl(temp, Address(ESP, location.GetStackIndex()));
+ return temp;
+ }
+ // For register locations, check if the register was saved. If so, get it from the stack.
+ // Note: There is a chance that the register was saved but not overwritten, so we could
+ // save one load. However, since this is just an intrinsic slow path we prefer this
+ // simple and more robust approach rather that trying to determine if that's the case.
+ SlowPathCode* slow_path = GetCurrentSlowPath();
+ DCHECK(slow_path != nullptr); // For intrinsified invokes the call is emitted on the slow path.
+ if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+ int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+ __ movl(temp, Address(ESP, stack_offset));
+ return temp;
+ }
+ return location.AsRegister<Register>();
+}
+
void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
switch (invoke->GetMethodLoadKind()) {
@@ -3794,7 +4082,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
__ fs()->movl(temp.AsRegister<Register>(), Address::Absolute(invoke->GetStringInitOffset()));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
@@ -3804,13 +4092,18 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
method_patches_.emplace_back(invoke->GetTargetMethod());
__ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
break;
- case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
- // TODO: Implement this type.
- // Currently filtered out by GetSupportedInvokeStaticOrDirectDispatch().
- LOG(FATAL) << "Unsupported";
- UNREACHABLE();
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ uint32_t offset = invoke->GetDexCacheArrayOffset();
+ __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+ // Add the patch entry and bind its label at the end of the instruction.
+ pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, offset);
+ __ Bind(&pc_relative_dex_cache_patches_.back().label);
+ break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register method_reg;
Register reg = temp.AsRegister<Register>();
if (current_method.IsRegister()) {
@@ -3821,7 +4114,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
method_reg = reg;
__ movl(reg, Address(ESP, kCurrentMethodStackOffset));
}
- // temp = temp->dex_cache_resolved_methods_;
+ // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
__ movl(reg, Address(method_reg,
ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
// temp = temp[index_in_cache]
@@ -3862,13 +4155,24 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
Register temp = temp_in.AsRegister<Register>();
uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
+
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ Register receiver = calling_convention.GetRegisterAt(0);
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- // temp = object->GetClass();
- DCHECK(receiver.IsRegister());
- __ movl(temp, Address(receiver.AsRegister<Register>(), class_offset));
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ movl(temp, Address(receiver, class_offset));
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ movl(temp, Address(temp, method_offset));
@@ -3879,23 +4183,33 @@ void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp
void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
- linker_patches->reserve(method_patches_.size() + relative_call_patches_.size());
+ size_t size =
+ method_patches_.size() +
+ relative_call_patches_.size() +
+ pc_relative_dex_cache_patches_.size();
+ linker_patches->reserve(size);
+ // The label points to the end of the "movl" insn but the literal offset for method
+ // patch needs to point to the embedded constant which occupies the last 4 bytes.
+ constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
for (const MethodPatchInfo<Label>& info : method_patches_) {
- // The label points to the end of the "movl" insn but the literal offset for method
- // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
- uint32_t literal_offset = info.label.Position() - 4;
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
info.target_method.dex_file,
info.target_method.dex_method_index));
}
for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
- // The label points to the end of the "call" insn but the literal offset for method
- // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
- uint32_t literal_offset = info.label.Position() - 4;
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
info.target_method.dex_file,
info.target_method.dex_method_index));
}
+ for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
+ linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
+ &info.target_dex_file,
+ GetMethodAddressOffset(),
+ info.element_offset));
+ }
}
void CodeGeneratorX86::MarkGCCard(Register temp,
@@ -3920,18 +4234,29 @@ void CodeGeneratorX86::MarkGCCard(Register temp,
void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ kEmitCompilerReadBarrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- // The output overlaps in case of long: we don't want the low move to overwrite
- // the object's location.
- locations->SetOut(Location::RequiresRegister(),
- (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
- : Location::kNoOutputOverlap);
+ // The output overlaps in case of long: we don't want the low move
+ // to overwrite the object's location. Likewise, in the case of
+ // an object field get with read barriers enabled, we do not want
+ // the move to overwrite the object's location, as we need it to emit
+ // the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ?
+ Location::kOutputOverlap :
+ Location::kNoOutputOverlap);
}
if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) {
@@ -3947,7 +4272,8 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- Register base = locations->InAt(0).AsRegister<Register>();
+ Location base_loc = locations->InAt(0);
+ Register base = base_loc.AsRegister<Register>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
Primitive::Type field_type = field_info.GetFieldType();
@@ -4022,7 +4348,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
}
if (field_type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
}
}
@@ -4043,16 +4369,16 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI
// Ensure the value is in a byte register.
locations->SetInAt(1, Location::RegisterLocation(EAX));
} else if (Primitive::IsFloatingPointType(field_type)) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
- } else {
+ if (is_volatile && field_type == Primitive::kPrimDouble) {
+ // In order to satisfy the semantics of volatile, this must be a single instruction store.
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+ }
+ } else if (is_volatile && field_type == Primitive::kPrimLong) {
+ // In order to satisfy the semantics of volatile, this must be a single instruction store.
locations->SetInAt(1, Location::RequiresRegister());
- }
- if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
- // Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
- // Ensure the card is in a byte register.
- locations->AddTemp(Location::RegisterLocation(ECX));
- } else if (is_volatile && (field_type == Primitive::kPrimLong)) {
+
// 64bits value can be atomically written to an address with movsd and an XMM register.
// We need two XMM registers because there's no easier way to (bit) copy a register pair
// into a single XMM register (we copy each pair part into the XMMs and then interleave them).
@@ -4060,6 +4386,15 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI
// isolated cases when we need this it isn't worth adding the extra complexity.
locations->AddTemp(Location::RequiresFpuRegister());
locations->AddTemp(Location::RequiresFpuRegister());
+ } else {
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+
+ if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+ // Temporary registers for the write barrier.
+ locations->AddTemp(Location::RequiresRegister()); // May be used for reference poisoning too.
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
+ }
}
}
@@ -4081,6 +4416,8 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
+ bool maybe_record_implicit_null_check_done = false;
+
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
@@ -4090,7 +4427,12 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
- __ movw(Address(base, offset), value.AsRegister<Register>());
+ if (value.IsConstant()) {
+ int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movw(Address(base, offset), Immediate(v));
+ } else {
+ __ movw(Address(base, offset), value.AsRegister<Register>());
+ }
break;
}
@@ -4105,6 +4447,9 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
__ movl(temp, value.AsRegister<Register>());
__ PoisonHeapReference(temp);
__ movl(Address(base, offset), temp);
+ } else if (value.IsConstant()) {
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movl(Address(base, offset), Immediate(v));
} else {
__ movl(Address(base, offset), value.AsRegister<Register>());
}
@@ -4120,21 +4465,40 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
__ punpckldq(temp1, temp2);
__ movsd(Address(base, offset), temp1);
codegen_->MaybeRecordImplicitNullCheck(instruction);
+ } else if (value.IsConstant()) {
+ int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+ __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
} else {
__ movl(Address(base, offset), value.AsRegisterPairLow<Register>());
codegen_->MaybeRecordImplicitNullCheck(instruction);
__ movl(Address(base, kX86WordSize + offset), value.AsRegisterPairHigh<Register>());
}
+ maybe_record_implicit_null_check_done = true;
break;
}
case Primitive::kPrimFloat: {
- __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ if (value.IsConstant()) {
+ int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ __ movl(Address(base, offset), Immediate(v));
+ } else {
+ __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ }
break;
}
case Primitive::kPrimDouble: {
- __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ if (value.IsConstant()) {
+ int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
+ __ movl(Address(base, offset), Immediate(Low32Bits(v)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(Address(base, kX86WordSize + offset), Immediate(High32Bits(v)));
+ maybe_record_implicit_null_check_done = true;
+ } else {
+ __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ }
break;
}
@@ -4143,8 +4507,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction,
UNREACHABLE();
}
- // Longs are handled in the switch.
- if (field_type != Primitive::kPrimLong) {
+ if (!maybe_record_implicit_null_check_done) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -4313,24 +4676,35 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) {
}
void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) {
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- // The output overlaps in case of long: we don't want the low move to overwrite
- // the array's location.
- locations->SetOut(Location::RequiresRegister(),
- (instruction->GetType() == Primitive::kPrimLong) ? Location::kOutputOverlap
- : Location::kNoOutputOverlap);
+ // The output overlaps in case of long: we don't want the low move
+ // to overwrite the array's location. Likewise, in the case of an
+ // object array get with read barriers enabled, we do not want the
+ // move to overwrite the array's location, as we need it to emit
+ // the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ?
+ Location::kOutputOverlap :
+ Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Location index = locations->InAt(1);
Primitive::Type type = instruction->GetType();
@@ -4385,6 +4759,9 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
Register out = locations->Out().AsRegister<Register>();
if (index.IsConstant()) {
@@ -4449,8 +4826,17 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
}
if (type == Primitive::kPrimNot) {
- Register out = locations->Out().AsRegister<Register>();
- __ MaybeUnpoisonHeapReference(out);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ Location out = locations->Out();
+ if (index.IsConstant()) {
+ uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+ } else {
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+ }
}
}
@@ -4461,14 +4847,18 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
// optimization.
Primitive::Type value_type = instruction->GetComponentType();
+
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-
- bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+ bool object_array_set_with_read_barrier =
+ kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ (may_need_runtime_call_for_type_check || object_array_set_with_read_barrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
bool is_byte_type = (value_type == Primitive::kPrimBoolean)
|| (value_type == Primitive::kPrimByte);
@@ -4481,7 +4871,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
// Ensure the value is in a byte register.
locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
} else if (Primitive::IsFloatingPointType(value_type)) {
- locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
} else {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
@@ -4495,14 +4885,15 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register array = locations->InAt(0).AsRegister<Register>();
+ Location array_loc = locations->InAt(0);
+ Register array = array_loc.AsRegister<Register>();
Location index = locations->InAt(1);
Location value = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
- bool may_need_runtime_call = locations->CanCall();
+ bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
@@ -4542,6 +4933,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
: Address(array, index.AsRegister<Register>(), TIMES_4, offset);
+
if (!value.IsRegister()) {
// Just setting null.
DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4549,7 +4941,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
__ movl(address, Immediate(0));
codegen_->MaybeRecordImplicitNullCheck(instruction);
DCHECK(!needs_write_barrier);
- DCHECK(!may_need_runtime_call);
+ DCHECK(!may_need_runtime_call_for_type_check);
break;
}
@@ -4558,7 +4950,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
NearLabel done, not_null, do_put;
SlowPathCode* slow_path = nullptr;
Register temp = locations->GetTemp(0).AsRegister<Register>();
- if (may_need_runtime_call) {
+ if (may_need_runtime_call_for_type_check) {
slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction);
codegen_->AddSlowPath(slow_path);
if (instruction->GetValueCanBeNull()) {
@@ -4570,22 +4962,62 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
__ Bind(&not_null);
}
- __ movl(temp, Address(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ MaybeUnpoisonHeapReference(temp);
- __ movl(temp, Address(temp, component_offset));
- // No need to poison/unpoison, we're comparing two poisoned references.
- __ cmpl(temp, Address(register_value, class_offset));
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- __ j(kEqual, &do_put);
- __ MaybeUnpoisonHeapReference(temp);
- __ movl(temp, Address(temp, super_offset));
- // No need to unpoison, we're comparing against null..
- __ testl(temp, temp);
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_put);
+ if (kEmitCompilerReadBarrier) {
+ // When read barriers are enabled, the type checking
+ // instrumentation requires two read barriers:
+ //
+ // __ movl(temp2, temp);
+ // // /* HeapReference<Class> */ temp = temp->component_type_
+ // __ movl(temp, Address(temp, component_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ //
+ // // /* HeapReference<Class> */ temp2 = register_value->klass_
+ // __ movl(temp2, Address(register_value, class_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+ //
+ // __ cmpl(temp, temp2);
+ //
+ // However, the second read barrier may trash `temp`, as it
+ // is a temporary register, and as such would not be saved
+ // along with live registers before calling the runtime (nor
+ // restored afterwards). So in this case, we bail out and
+ // delegate the work to the array set slow path.
+ //
+ // TODO: Extend the register allocator to support a new
+ // "(locally) live temp" location so as to avoid always
+ // going into the slow path when read barriers are enabled.
+ __ jmp(slow_path->GetEntryLabel());
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ movl(temp, Address(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ movl(temp, Address(temp, component_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor the object reference in `register_value->klass`, as
+ // we are comparing two poisoned references.
+ __ cmpl(temp, Address(register_value, class_offset));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ __ j(kEqual, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ __ movl(temp, Address(temp, super_offset));
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp`, as we are comparing against null below.
+ __ testl(temp, temp);
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
}
}
@@ -4596,7 +5028,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
} else {
__ movl(address, register_value);
}
- if (!may_need_runtime_call) {
+ if (!may_need_runtime_call_for_type_check) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
}
@@ -4611,6 +5043,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
break;
}
+
case Primitive::kPrimInt: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
Address address = index.IsConstant()
@@ -4667,8 +5100,14 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
: Address(array, index.AsRegister<Register>(), TIMES_4, offset);
- DCHECK(value.IsFpuRegister());
- __ movss(address, value.AsFpuRegister<XmmRegister>());
+ if (value.IsFpuRegister()) {
+ __ movss(address, value.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(value.IsConstant());
+ int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+ __ movl(address, Immediate(v));
+ }
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -4677,8 +5116,19 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
: Address(array, index.AsRegister<Register>(), TIMES_8, offset);
- DCHECK(value.IsFpuRegister());
- __ movsd(address, value.AsFpuRegister<XmmRegister>());
+ if (value.IsFpuRegister()) {
+ __ movsd(address, value.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(value.IsConstant());
+ Address address_hi = index.IsConstant() ?
+ Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+ offset + kX86WordSize) :
+ Address(array, index.AsRegister<Register>(), TIMES_8, offset + kX86WordSize);
+ int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ __ movl(address, Immediate(Low32Bits(v)));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ movl(address_hi, Immediate(High32Bits(v)));
+ }
break;
}
@@ -5064,7 +5514,8 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(EAX));
+ Location::RegisterLocation(EAX),
+ /* code_generator_supports_read_barrier */ true);
}
void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
@@ -5075,31 +5526,60 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) {
cls,
cls->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
+
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ leal(out, Address(current_method, declaring_class_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ movl(out, Address(current_method, declaring_class_offset));
+ }
} else {
- DCHECK(cls->CanCallRuntime());
- __ movl(out, Address(
- current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
- __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
- // TODO: We will need a read barrier here.
-
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
+ __ movl(out, Address(current_method,
+ ArtMethod::DexCacheResolvedTypesOffset(kX86PointerSize).Int32Value()));
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+ __ leal(out, Address(out, cache_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
} else {
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ __ movl(out, Address(out, cache_offset));
+ }
+
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+
+ if (!cls->IsInDexCache()) {
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ }
+
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -5143,12 +5623,35 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = load->GetLocations();
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
Register current_method = locations->InAt(0).AsRegister<Register>();
- __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ leal(out, Address(current_method, declaring_class_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ movl(out, Address(current_method, declaring_class_offset));
+ }
+
+ // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
__ movl(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
- __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- // TODO: We will need a read barrier here.
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::String>* */ out = &out[string_index]
+ __ leal(out, Address(out, cache_offset));
+ // /* mirror::String* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ __ movl(out, Address(out, cache_offset));
+ }
+
__ testl(out, out);
__ j(kEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -5188,44 +5691,49 @@ void InstructionCodeGeneratorX86::VisitThrow(HThrow* instruction) {
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- // Note that TypeCheckSlowPathX86 uses this register too.
- locations->SetOut(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetOut(Location::RegisterLocation(EAX));
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ // Note that TypeCheckSlowPathX86 uses this "out" register too.
+ locations->SetOut(Location::RequiresRegister());
+ // When read barriers are enabled, we need a temporary register for
+ // some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Location cls = locations->InAt(1);
- Register out = locations->Out().AsRegister<Register>();
+ Location out_loc = locations->Out();
+ Register out = out_loc.AsRegister<Register>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5240,15 +5748,9 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ j(kEqual, &zero);
}
- // In case of an interface/unresolved check, we put the object class into the object register.
- // This is safe, as the register is caller-save, and the object must be in another
- // register if it survives the runtime call.
- Register target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
- (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
- ? obj
- : out;
- __ movl(target, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(target);
+ // /* HeapReference<Class> */ out = obj->klass_
+ __ movl(out, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
switch (instruction->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck: {
@@ -5265,13 +5767,23 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ jmp(&done);
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
NearLabel loop;
__ Bind(&loop);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ movl(out, Address(out, super_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5288,6 +5800,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
NearLabel loop, success;
@@ -5299,8 +5812,17 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(ESP, cls.GetStackIndex()));
}
__ j(kEqual, &success);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ movl(out, Address(out, super_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ testl(out, out);
__ j(kNotEqual, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
@@ -5312,6 +5834,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
NearLabel exact_check;
@@ -5322,9 +5845,18 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(ESP, cls.GetStackIndex()));
}
__ j(kEqual, &exact_check);
- // Otherwise, we need to check that the object's class is a non primitive array.
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp = temp_loc.AsRegister<Register>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->component_type_
__ movl(out, Address(out, component_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5335,6 +5867,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ jmp(&done);
break;
}
+
case TypeCheckKind::kArrayCheck: {
if (cls.IsRegister()) {
__ cmpl(out, cls.AsRegister<Register>());
@@ -5343,8 +5876,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(ESP, cls.GetStackIndex()));
}
DCHECK(locations->OnlyCallsOnSlowPath());
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
- instruction, /* is_fatal */ false);
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+ /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ j(kNotEqual, slow_path->GetEntryLabel());
__ movl(out, Immediate(1));
@@ -5353,13 +5886,25 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- default: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved & interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
__ jmp(&done);
}
@@ -5384,75 +5929,73 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
- break;
- case TypeCheckKind::kInterfaceCheck:
- case TypeCheckKind::kUnresolvedCheck:
- call_kind = LocationSummary::kCall;
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
case TypeCheckKind::kArrayCheck:
+ case TypeCheckKind::kUnresolvedCheck:
+ case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
-
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- // Note that TypeCheckSlowPathX86 uses this register too.
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ // Note that TypeCheckSlowPathX86 uses this "temp" register too.
+ locations->AddTemp(Location::RequiresRegister());
+ // When read barriers are enabled, we need an additional temporary
+ // register for some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
locations->AddTemp(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
}
void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
- Register obj = locations->InAt(0).AsRegister<Register>();
+ Location obj_loc = locations->InAt(0);
+ Register obj = obj_loc.AsRegister<Register>();
Location cls = locations->InAt(1);
- Register temp = locations->WillCall()
- ? kNoRegister
- : locations->GetTemp(0).AsRegister<Register>();
-
+ Location temp_loc = locations->GetTemp(0);
+ Register temp = temp_loc.AsRegister<Register>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- SlowPathCode* slow_path = nullptr;
- if (!locations->WillCall()) {
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(
- instruction, !locations->CanCall());
- codegen_->AddSlowPath(slow_path);
- }
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ SlowPathCode* type_check_slow_path =
+ new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
+ is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(type_check_slow_path);
- NearLabel done, abstract_entry;
+ NearLabel done;
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
__ testl(obj, obj);
__ j(kEqual, &done);
}
- if (locations->WillCall()) {
- __ movl(obj, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(obj);
- } else {
- __ movl(temp, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(temp);
- }
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
if (cls.IsRegister()) {
@@ -5463,19 +6006,44 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
}
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- NearLabel loop, success;
+ NearLabel loop, compare_classes;
__ Bind(&loop);
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ movl(temp, Address(temp, super_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // to the `compare_classes` label to compare it with the checked
+ // class.
__ testl(temp, temp);
- // Jump to the slow path to throw the exception.
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, &compare_classes);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&compare_classes);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<Register>());
} else {
@@ -5485,6 +6053,7 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ j(kNotEqual, &loop);
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
NearLabel loop;
@@ -5496,16 +6065,39 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ cmpl(temp, Address(ESP, cls.GetStackIndex()));
}
__ j(kEqual, &done);
+
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ movl(temp, Address(temp, super_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // back at the beginning of the loop.
__ testl(temp, temp);
__ j(kNotEqual, &loop);
- // Jump to the slow path to throw the exception.
- __ jmp(slow_path->GetEntryLabel());
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
+ NearLabel check_non_primitive_component_type;
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<Register>());
} else {
@@ -5513,29 +6105,67 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ cmpl(temp, Address(ESP, cls.GetStackIndex()));
}
__ j(kEqual, &done);
- // Otherwise, we need to check that the object's class is a non primitive array.
+
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ Register temp2 = temp2_loc.AsRegister<Register>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->component_type_
__ movl(temp, Address(temp, component_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(
+ instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+ // If the component type is not null (i.e. the object is indeed
+ // an array), jump to label `check_non_primitive_component_type`
+ // to further check that this component type is not a primitive
+ // type.
__ testl(temp, temp);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, &check_non_primitive_component_type);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&check_non_primitive_component_type);
__ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, &done);
+ // Same comment as above regarding `temp` and the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- default:
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ // We always go into the type check slow path for the unresolved &
+ // interface check cases.
+ //
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
__ Bind(&done);
- if (slow_path != nullptr) {
- __ Bind(slow_path->GetExitLabel());
- }
+ __ Bind(type_check_slow_path->GetExitLabel());
}
void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5551,6 +6181,11 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr
instruction,
instruction->GetDexPc(),
nullptr);
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
}
void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5686,6 +6321,82 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr
}
}
+void CodeGeneratorX86::GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ // TODO: When read barrier has a fast path, add it here.
+ /* Currently the read barrier call is inserted after the original load.
+ * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+ * original load. This load-load ordering is required by the read barrier.
+ * The fast path/slow path (for Baker's algorithm) should look like:
+ *
+ * bool isGray = obj.LockWord & kReadBarrierMask;
+ * lfence; // load fence or artificial data dependence to prevent load-load reordering
+ * ref = obj.field; // this is the original load
+ * if (isGray) {
+ * ref = Mark(ref); // ideally the slow path just does Mark(ref)
+ * }
+ */
+
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86::MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<Register>());
+ }
+}
+
+void CodeGeneratorX86::GenerateReadBarrierForRoot(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ // TODO: Implement a fast path for ReadBarrierForRoot, performing
+ // the following operation (for Baker's algorithm):
+ //
+ // if (thread.tls32_.is_gc_marking) {
+ // root = Mark(root);
+ // }
+
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderX86::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index ac3d06c23d..064051c7f4 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -227,14 +227,12 @@ class InstructionCodeGeneratorX86 : public HGraphVisitor {
void GenerateImplicitNullCheck(HNullCheck* instruction);
void GenerateExplicitNullCheck(HNullCheck* instruction);
void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
- void GenerateCompareTestAndBranch(HIf* if_inst,
- HCondition* condition,
+ Label* false_target);
+ void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
+ Label* false_target);
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
@@ -397,7 +395,64 @@ class CodeGeneratorX86 : public CodeGenerator {
void Finalize(CodeAllocator* allocator) OVERRIDE;
+ // Generate a read barrier for a heap reference within `instruction`.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` is provided (i.e. for array accesses), the offset
+ // value passed to artReadBarrierSlow is adjusted to take `index`
+ // into account.
+ void GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap reference.
+ // If heap poisoning is enabled, also unpoison the reference in `out`.
+ void MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction`.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
private:
+ Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
+
+ struct PcRelativeDexCacheAccessInfo {
+ PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
+ : target_dex_file(dex_file), element_offset(element_off), label() { }
+
+ const DexFile& target_dex_file;
+ uint32_t element_offset;
+ // NOTE: Label is bound to the end of the instruction that has an embedded 32-bit offset.
+ Label label;
+ };
+
// Labels for each block that will be compiled.
Label* block_labels_; // Indexed by block id.
Label frame_entry_label_;
@@ -410,6 +465,8 @@ class CodeGeneratorX86 : public CodeGenerator {
// Method patch info. Using ArenaDeque<> which retains element addresses on push/emplace_back().
ArenaDeque<MethodPatchInfo<Label>> method_patches_;
ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
+ // PC-relative DexCache access info.
+ ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
// Offset to the start of the constant area in the assembled code.
// Used for fixups to the constant area.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 5218d70995..4618be9cc3 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -34,6 +34,9 @@
namespace art {
+template<class MirrorType>
+class GcRoot;
+
namespace x86_64 {
static constexpr int kCurrentMethodStackOffset = 0;
@@ -52,16 +55,17 @@ class NullCheckSlowPathX86_64 : public SlowPathCode {
explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowNullPointer),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -78,16 +82,17 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
SaveLiveRegisters(codegen, instruction_->GetLocations());
}
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowDivZero),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -139,18 +144,19 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
: instruction_(instruction), successor_(successor) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pTestSuspend),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickTestSuspend, void, void>();
RestoreLiveRegisters(codegen, instruction_->GetLocations());
if (successor_ == nullptr) {
__ jmp(GetReturnLabel());
} else {
- __ jmp(x64_codegen->GetLabelOf(successor_));
+ __ jmp(x86_64_codegen->GetLabelOf(successor_));
}
}
@@ -180,7 +186,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = instruction_->GetLocations();
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
// Live registers will be restored in the catch block if caught.
@@ -196,8 +202,11 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
locations->InAt(1),
Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
Primitive::kPrimInt);
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
- instruction_, instruction_->GetDexPc(), this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowArrayBounds),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
bool IsFatal() const OVERRIDE { return true; }
@@ -222,22 +231,30 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
LocationSummary* locations = at_->GetLocations();
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(cls_->GetTypeIndex()));
- x64_codegen->InvokeRuntime(do_clinit_ ? QUICK_ENTRY_POINT(pInitializeStaticStorage)
- : QUICK_ENTRY_POINT(pInitializeType),
- at_, dex_pc_, this);
+ x86_64_codegen->InvokeRuntime(do_clinit_ ?
+ QUICK_ENTRY_POINT(pInitializeStaticStorage) :
+ QUICK_ENTRY_POINT(pInitializeType),
+ at_,
+ dex_pc_,
+ this);
+ if (do_clinit_) {
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ } else {
+ CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ }
Location out = locations->Out();
// Move the class to the desired location.
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
- x64_codegen->Move(out, Location::RegisterLocation(RAX));
+ x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
}
RestoreLiveRegisters(codegen, locations);
@@ -271,18 +288,19 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
__ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
Immediate(instruction_->GetStringIndex()));
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
- instruction_,
- instruction_->GetDexPc(),
- this);
- x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pResolveString),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+ x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -308,18 +326,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
DCHECK(instruction_->IsCheckCast()
|| !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
- if (instruction_->IsCheckCast()) {
- // The codegen for the instruction overwrites `temp`, so put it back in place.
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
- CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
- uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
- __ movl(temp, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(temp);
- }
-
if (!is_fatal_) {
SaveLiveRegisters(codegen, locations);
}
@@ -336,21 +345,24 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
Primitive::kPrimNot);
if (instruction_->IsInstanceOf()) {
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction_,
- dex_pc,
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
+ instruction_,
+ dex_pc,
+ this);
+ CheckEntrypointTypes<
+ kQuickInstanceofNonTrivial, uint32_t, const mirror::Class*, const mirror::Class*>();
} else {
DCHECK(instruction_->IsCheckCast());
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction_,
- dex_pc,
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
+ instruction_,
+ dex_pc,
+ this);
+ CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
}
if (!is_fatal_) {
if (instruction_->IsInstanceOf()) {
- x64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
+ x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
}
RestoreLiveRegisters(codegen, locations);
@@ -375,15 +387,16 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
: instruction_(instruction) {}
void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, instruction_->GetLocations());
DCHECK(instruction_->IsDeoptimize());
HDeoptimize* deoptimize = instruction_->AsDeoptimize();
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
- deoptimize,
- deoptimize->GetDexPc(),
- this);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pDeoptimize),
+ deoptimize,
+ deoptimize->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickDeoptimize, void, void>();
}
const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
@@ -421,11 +434,12 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
nullptr);
codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
- CodeGeneratorX86_64* x64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
- x64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
- instruction_,
- instruction_->GetDexPc(),
- this);
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pAputObject),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
RestoreLiveRegisters(codegen, locations);
__ jmp(GetExitLabel());
}
@@ -438,6 +452,219 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
};
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index)
+ : instruction_(instruction),
+ out_(out),
+ ref_(ref),
+ obj_(obj),
+ offset_(offset),
+ index_(index) {
+ DCHECK(kEmitCompilerReadBarrier);
+ // If `obj` is equal to `out` or `ref`, it means the initial
+ // object has been overwritten by (or after) the heap object
+ // reference load to be instrumented, e.g.:
+ //
+ // __ movl(out, Address(out, offset));
+ // codegen_->GenerateReadBarrier(instruction, out_loc, out_loc, out_loc, offset);
+ //
+ // In that case, we have lost the information about the original
+ // object, and the emitted read barrier cannot work properly.
+ DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+ DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ LocationSummary* locations = instruction_->GetLocations();
+ CpuRegister reg_out = out_.AsRegister<CpuRegister>();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
+ DCHECK(!instruction_->IsInvoke() ||
+ (instruction_->IsInvokeStaticOrDirect() &&
+ instruction_->GetLocations()->Intrinsified()));
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ // We may have to change the index's value, but as `index_` is a
+ // constant member (like other "inputs" of this slow path),
+ // introduce a copy of it, `index`.
+ Location index = index_;
+ if (index_.IsValid()) {
+ // Handle `index_` for HArrayGet and intrinsic UnsafeGetObject.
+ if (instruction_->IsArrayGet()) {
+ // Compute real offset and store it in index_.
+ Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
+ DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+ if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+ // We are about to change the value of `index_reg` (see the
+ // calls to art::x86_64::X86_64Assembler::shll and
+ // art::x86_64::X86_64Assembler::AddImmediate below), but it
+ // has not been saved by the previous call to
+ // art::SlowPathCode::SaveLiveRegisters, as it is a
+ // callee-save register --
+ // art::SlowPathCode::SaveLiveRegisters does not consider
+ // callee-save registers, as it has been designed with the
+ // assumption that callee-save registers are supposed to be
+ // handled by the called function. So, as a callee-save
+ // register, `index_reg` _would_ eventually be saved onto
+ // the stack, but it would be too late: we would have
+ // changed its value earlier. Therefore, we manually save
+ // it here into another freely available register,
+ // `free_reg`, chosen of course among the caller-save
+ // registers (as a callee-save `free_reg` register would
+ // exhibit the same problem).
+ //
+ // Note we could have requested a temporary register from
+ // the register allocator instead; but we prefer not to, as
+ // this is a slow path, and we know we can find a
+ // caller-save register that is available.
+ Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
+ __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
+ index_reg = free_reg;
+ index = Location::RegisterLocation(index_reg);
+ } else {
+ // The initial register stored in `index_` has already been
+ // saved in the call to art::SlowPathCode::SaveLiveRegisters
+ // (as it is not a callee-save register), so we can freely
+ // use it.
+ }
+ // Shifting the index value contained in `index_reg` by the
+ // scale factor (2) cannot overflow in practice, as the
+ // runtime is unable to allocate object arrays with a size
+ // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
+ __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
+ } else {
+ DCHECK(instruction_->IsInvoke());
+ DCHECK(instruction_->GetLocations()->Intrinsified());
+ DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+ (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+ << instruction_->AsInvoke()->GetIntrinsic();
+ DCHECK_EQ(offset_, 0U);
+ DCHECK(index_.IsRegister());
+ }
+ }
+
+ // We're moving two or three locations to locations that could
+ // overlap, so we need a parallel move resolver.
+ InvokeRuntimeCallingConvention calling_convention;
+ HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+ parallel_move.AddMove(ref_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ Primitive::kPrimNot,
+ nullptr);
+ parallel_move.AddMove(obj_,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+ Primitive::kPrimNot,
+ nullptr);
+ if (index.IsValid()) {
+ parallel_move.AddMove(index,
+ Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+ Primitive::kPrimInt,
+ nullptr);
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ } else {
+ codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+ __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
+ }
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<
+ kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+ x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE {
+ return "ReadBarrierForHeapReferenceSlowPathX86_64";
+ }
+
+ private:
+ CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+ size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
+ size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
+ for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+ if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
+ return static_cast<CpuRegister>(i);
+ }
+ }
+ // We shall never fail to find a free caller-save register, as
+ // there are more than two core caller-save registers on x86-64
+ // (meaning it is possible to find one which is different from
+ // `ref` and `obj`).
+ DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+ LOG(FATAL) << "Could not find a free caller-save register";
+ UNREACHABLE();
+ }
+
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location ref_;
+ const Location obj_;
+ const uint32_t offset_;
+ // An additional location containing an index to an array.
+ // Only used for HArrayGet and the UnsafeGetObject &
+ // UnsafeGetObjectVolatile intrinsics.
+ const Location index_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
+ public:
+ ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
+ : instruction_(instruction), out_(out), root_(root) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ LocationSummary* locations = instruction_->GetLocations();
+ DCHECK(locations->CanCall());
+ DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
+ DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString());
+
+ __ Bind(GetEntryLabel());
+ SaveLiveRegisters(codegen, locations);
+
+ InvokeRuntimeCallingConvention calling_convention;
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+ x86_64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pReadBarrierForRootSlow),
+ instruction_,
+ instruction_->GetDexPc(),
+ this);
+ CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+ x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
+
+ RestoreLiveRegisters(codegen, locations);
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
+
+ private:
+ HInstruction* const instruction_;
+ const Location out_;
+ const Location root_;
+
+ DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
+};
+
#undef __
#define __ down_cast<X86_64Assembler*>(GetAssembler())->
@@ -503,7 +730,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
Address::Absolute(invoke->GetStringInitOffset(), true));
break;
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
- callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
__ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
@@ -514,15 +741,15 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
__ Bind(&method_patches_.back().label); // Bind the label at the end of the "movl" insn.
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
- pc_rel_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
- invoke->GetDexCacheArrayOffset());
+ pc_relative_dex_cache_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+ invoke->GetDexCacheArrayOffset());
__ movq(temp.AsRegister<CpuRegister>(),
Address::Absolute(kDummy32BitOffset, false /* no_rip */));
// Bind the label at the end of the "movl" insn.
- __ Bind(&pc_rel_dex_cache_patches_.back().label);
+ __ Bind(&pc_relative_dex_cache_patches_.back().label);
break;
case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
- Location current_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodInputIndex());
+ Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
Register method_reg;
CpuRegister reg = temp.AsRegister<CpuRegister>();
if (current_method.IsRegister()) {
@@ -533,7 +760,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invo
method_reg = reg.AsRegister();
__ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
}
- // temp = temp->dex_cache_resolved_methods_;
+ // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
__ movq(reg,
Address(CpuRegister(method_reg),
ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
@@ -575,13 +802,25 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
CpuRegister temp = temp_in.AsRegister<CpuRegister>();
size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
- LocationSummary* locations = invoke->GetLocations();
- Location receiver = locations->InAt(0);
+
+ // Use the calling convention instead of the location of the receiver, as
+ // intrinsics may have put the receiver in a different register. In the intrinsics
+ // slow path, the arguments have been moved to the right place, so here we are
+ // guaranteed that the receiver is the first register of the calling convention.
+ InvokeDexCallingConvention calling_convention;
+ Register receiver = calling_convention.GetRegisterAt(0);
+
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
- // temp = object->GetClass();
- DCHECK(receiver.IsRegister());
- __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
+ // /* HeapReference<Class> */ temp = receiver->klass_
+ __ movl(temp, Address(CpuRegister(receiver), class_offset));
MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetMethodAt(method_offset);
__ movq(temp, Address(temp, method_offset));
@@ -593,28 +832,27 @@ void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location t
void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
- method_patches_.size() + relative_call_patches_.size() + pc_rel_dex_cache_patches_.size();
+ method_patches_.size() +
+ relative_call_patches_.size() +
+ pc_relative_dex_cache_patches_.size();
linker_patches->reserve(size);
+ // The label points to the end of the "movl" insn but the literal offset for method
+ // patch needs to point to the embedded constant which occupies the last 4 bytes.
+ constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
for (const MethodPatchInfo<Label>& info : method_patches_) {
- // The label points to the end of the "movl" instruction but the literal offset for method
- // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
- uint32_t literal_offset = info.label.Position() - 4;
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::MethodPatch(literal_offset,
info.target_method.dex_file,
info.target_method.dex_method_index));
}
for (const MethodPatchInfo<Label>& info : relative_call_patches_) {
- // The label points to the end of the "call" instruction but the literal offset for method
- // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
- uint32_t literal_offset = info.label.Position() - 4;
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::RelativeCodePatch(literal_offset,
info.target_method.dex_file,
info.target_method.dex_method_index));
}
- for (const PcRelativeDexCacheAccessInfo& info : pc_rel_dex_cache_patches_) {
- // The label points to the end of the "mov" instruction but the literal offset for method
- // patch x86 needs to point to the embedded constant which occupies the last 4 bytes.
- uint32_t literal_offset = info.label.Position() - 4;
+ for (const PcRelativeDexCacheAccessInfo& info : pc_relative_dex_cache_patches_) {
+ uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(literal_offset,
&info.target_dex_file,
info.label.Position(),
@@ -673,9 +911,9 @@ static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
- const X86_64InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options,
- OptimizingCompilerStats* stats)
+ const X86_64InstructionSetFeatures& isa_features,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats)
: CodeGenerator(graph,
kNumberOfCpuRegisters,
kNumberOfFloatRegisters,
@@ -695,7 +933,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
constant_area_start_(0),
method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
relative_call_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
- pc_rel_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+ pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
}
@@ -729,7 +967,7 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const {
LOG(FATAL) << "Unreachable type " << type;
}
- return Location();
+ return Location::NoLocation();
}
void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const {
@@ -1083,26 +1321,19 @@ void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
__ j(X86_64FPCondition(cond->GetCondition()), true_label);
}
-void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
- HCondition* condition,
- Label* true_target,
- Label* false_target,
- Label* always_true_target) {
+void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
+ Label* true_target_in,
+ Label* false_target_in) {
+ // Generated branching requires both targets to be explicit. If either of the
+ // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
+ Label fallthrough_target;
+ Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
+ Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
+
LocationSummary* locations = condition->GetLocations();
Location left = locations->InAt(0);
Location right = locations->InAt(1);
- // We don't want true_target as a nullptr.
- if (true_target == nullptr) {
- true_target = always_true_target;
- }
- bool falls_through = (false_target == nullptr);
-
- // FP compares don't like null false_targets.
- if (false_target == nullptr) {
- false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- }
-
Primitive::Type type = condition->InputAt(0)->GetType();
switch (type) {
case Primitive::kPrimLong: {
@@ -1161,117 +1392,140 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HIf* if_instr,
LOG(FATAL) << "Unexpected condition type " << type;
}
- if (!falls_through) {
+ if (false_target != &fallthrough_target) {
__ jmp(false_target);
}
+
+ if (fallthrough_target.IsLinked()) {
+ __ Bind(&fallthrough_target);
+ }
+}
+
+static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
+ // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
+ // are set only strictly before `branch`. We can't use the eflags on long
+ // conditions if they are materialized due to the complex branching.
+ return cond->IsCondition() &&
+ cond->GetNext() == branch &&
+ !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
}
void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target) {
- HInstruction* cond = instruction->InputAt(0);
- if (cond->IsIntConstant()) {
+ Label* false_target) {
+ HInstruction* cond = instruction->InputAt(condition_input_index);
+
+ if (true_target == nullptr && false_target == nullptr) {
+ // Nothing to do. The code always falls through.
+ return;
+ } else if (cond->IsIntConstant()) {
// Constant condition, statically compared against 1.
- int32_t cond_value = cond->AsIntConstant()->GetValue();
- if (cond_value == 1) {
- if (always_true_target != nullptr) {
- __ jmp(always_true_target);
+ if (cond->AsIntConstant()->IsOne()) {
+ if (true_target != nullptr) {
+ __ jmp(true_target);
}
- return;
} else {
- DCHECK_EQ(cond_value, 0);
+ DCHECK(cond->AsIntConstant()->IsZero());
+ if (false_target != nullptr) {
+ __ jmp(false_target);
+ }
}
- } else {
- bool is_materialized =
- !cond->IsCondition() || cond->AsCondition()->NeedsMaterialization();
- // Moves do not affect the eflags register, so if the condition is
- // evaluated just before the if, we don't need to evaluate it
- // again. We can't use the eflags on FP conditions if they are
- // materialized due to the complex branching.
- Primitive::Type type = cond->IsCondition() ? cond->InputAt(0)->GetType() : Primitive::kPrimInt;
- bool eflags_set = cond->IsCondition()
- && cond->AsCondition()->IsBeforeWhenDisregardMoves(instruction)
- && !Primitive::IsFloatingPointType(type);
-
- if (is_materialized) {
- if (!eflags_set) {
- // Materialized condition, compare against 0.
- Location lhs = instruction->GetLocations()->InAt(0);
- if (lhs.IsRegister()) {
- __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()),
- Immediate(0));
- }
- __ j(kNotEqual, true_target);
+ return;
+ }
+
+ // The following code generates these patterns:
+ // (1) true_target == nullptr && false_target != nullptr
+ // - opposite condition true => branch to false_target
+ // (2) true_target != nullptr && false_target == nullptr
+ // - condition true => branch to true_target
+ // (3) true_target != nullptr && false_target != nullptr
+ // - condition true => branch to true_target
+ // - branch to false_target
+ if (IsBooleanValueOrMaterializedCondition(cond)) {
+ if (AreEflagsSetFrom(cond, instruction)) {
+ if (true_target == nullptr) {
+ __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
} else {
__ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
}
} else {
- // Condition has not been materialized, use its inputs as the
- // comparison and its condition as the branch condition.
-
- // Is this a long or FP comparison that has been folded into the HCondition?
- if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
- // Generate the comparison directly.
- GenerateCompareTestAndBranch(instruction->AsIf(), cond->AsCondition(),
- true_target, false_target, always_true_target);
- return;
+ // Materialized condition, compare against 0.
+ Location lhs = instruction->GetLocations()->InAt(condition_input_index);
+ if (lhs.IsRegister()) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
+ } else {
+ __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
+ }
+ if (true_target == nullptr) {
+ __ j(kEqual, false_target);
+ } else {
+ __ j(kNotEqual, true_target);
}
+ }
+ } else {
+ // Condition has not been materialized, use its inputs as the
+ // comparison and its condition as the branch condition.
+ HCondition* condition = cond->AsCondition();
+
+ // If this is a long or FP comparison that has been folded into
+ // the HCondition, generate the comparison directly.
+ Primitive::Type type = condition->InputAt(0)->GetType();
+ if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
+ GenerateCompareTestAndBranch(condition, true_target, false_target);
+ return;
+ }
- Location lhs = cond->GetLocations()->InAt(0);
- Location rhs = cond->GetLocations()->InAt(1);
- if (rhs.IsRegister()) {
- __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
- } else if (rhs.IsConstant()) {
- int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
- if (constant == 0) {
- __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
- } else {
- __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
- }
+ Location lhs = condition->GetLocations()->InAt(0);
+ Location rhs = condition->GetLocations()->InAt(1);
+ if (rhs.IsRegister()) {
+ __ cmpl(lhs.AsRegister<CpuRegister>(), rhs.AsRegister<CpuRegister>());
+ } else if (rhs.IsConstant()) {
+ int32_t constant = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
+ if (constant == 0) {
+ __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
} else {
- __ cmpl(lhs.AsRegister<CpuRegister>(),
- Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ __ cmpl(lhs.AsRegister<CpuRegister>(), Immediate(constant));
}
- __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
+ } else {
+ __ cmpl(lhs.AsRegister<CpuRegister>(),
+ Address(CpuRegister(RSP), rhs.GetStackIndex()));
+ }
+ if (true_target == nullptr) {
+ __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
+ } else {
+ __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
}
}
- if (false_target != nullptr) {
+
+ // If neither branch falls through (case 3), the conditional branch to `true_target`
+ // was already emitted (case 2) and we need to emit a jump to `false_target`.
+ if (true_target != nullptr && false_target != nullptr) {
__ jmp(false_target);
}
}
void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
- LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(if_instr, LocationSummary::kNoCall);
- HInstruction* cond = if_instr->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
+ if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
}
void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
- Label* true_target = codegen_->GetLabelOf(if_instr->IfTrueSuccessor());
- Label* false_target = codegen_->GetLabelOf(if_instr->IfFalseSuccessor());
- Label* always_true_target = true_target;
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfTrueSuccessor())) {
- always_true_target = nullptr;
- }
- if (codegen_->GoesToNextBlock(if_instr->GetBlock(),
- if_instr->IfFalseSuccessor())) {
- false_target = nullptr;
- }
- GenerateTestAndBranch(if_instr, true_target, false_target, always_true_target);
+ HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
+ HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
+ Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+ nullptr : codegen_->GetLabelOf(true_successor);
+ Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+ nullptr : codegen_->GetLabelOf(false_successor);
+ GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
}
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
LocationSummary* locations = new (GetGraph()->GetArena())
LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
- HInstruction* cond = deoptimize->InputAt(0);
- if (!cond->IsCondition() || cond->AsCondition()->NeedsMaterialization()) {
+ if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
locations->SetInAt(0, Location::Any());
}
}
@@ -1280,8 +1534,10 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCode* slow_path = new (GetGraph()->GetArena())
DeoptimizationSlowPathX86_64(deoptimize);
codegen_->AddSlowPath(slow_path);
- Label* slow_path_entry = slow_path->GetEntryLabel();
- GenerateTestAndBranch(deoptimize, slow_path_entry, nullptr, slow_path_entry);
+ GenerateTestAndBranch(deoptimize,
+ /* condition_input_index */ 0,
+ slow_path->GetEntryLabel(),
+ /* false_target */ nullptr);
}
void LocationsBuilderX86_64::VisitLocal(HLocal* local) {
@@ -1819,7 +2075,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ
LOG(FATAL) << "Unexpected parameter type " << type;
break;
}
- return Location();
+ return Location::NoLocation();
}
void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
@@ -1890,7 +2146,6 @@ void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke)
}
codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
@@ -1903,31 +2158,41 @@ void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
- CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
+ LocationSummary* locations = invoke->GetLocations();
+ CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
uint32_t method_offset = mirror::Class::EmbeddedImTableEntryOffset(
invoke->GetImtIndex() % mirror::Class::kImtSize, kX86_64PointerSize).Uint32Value();
- LocationSummary* locations = invoke->GetLocations();
Location receiver = locations->InAt(0);
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
- // Set the hidden argument.
- CpuRegister hidden_reg = invoke->GetLocations()->GetTemp(1).AsRegister<CpuRegister>();
+ // Set the hidden argument. This is safe to do this here, as RAX
+ // won't be modified thereafter, before the `call` instruction.
+ DCHECK_EQ(RAX, hidden_reg.AsRegister());
codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
- // temp = object->GetClass();
if (receiver.IsStackSlot()) {
__ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
+ // /* HeapReference<Class> */ temp = temp->klass_
__ movl(temp, Address(temp, class_offset));
} else {
+ // /* HeapReference<Class> */ temp = receiver->klass_
__ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
}
codegen_->MaybeRecordImplicitNullCheck(invoke);
+ // Instead of simply (possibly) unpoisoning `temp` here, we should
+ // emit a read barrier for the previous class reference load.
+ // However this is not required in practice, as this is an
+ // intermediate/temporary reference and because the current
+ // concurrent copying collector keeps the from-space memory
+ // intact/accessible until the end of the marking phase (the
+ // concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
// temp = temp->GetImtEntryAt(method_offset);
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
- __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+ __ call(Address(temp,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64WordSize).SizeValue()));
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
@@ -2562,7 +2827,7 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
// We can use a leaq or addq if the constant can fit in an immediate.
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(add->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
}
@@ -2682,7 +2947,7 @@ void LocationsBuilderX86_64::VisitSub(HSub* sub) {
}
case Primitive::kPrimLong: {
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(sub->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
locations->SetOut(Location::SameAsFirstInput());
break;
}
@@ -3517,22 +3782,19 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCall);
InvokeRuntimeCallingConvention calling_convention;
- locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
locations->SetOut(Location::RegisterLocation(RAX));
}
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
- InvokeRuntimeCallingConvention calling_convention;
- codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
- instruction->GetTypeIndex());
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
-
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -3551,13 +3813,13 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
InvokeRuntimeCallingConvention calling_convention;
codegen_->Load64BitValue(CpuRegister(calling_convention.GetRegisterAt(0)),
instruction->GetTypeIndex());
-
// Note: if heap poisoning is enabled, the entry point takes cares
// of poisoning the reference.
codegen_->InvokeRuntime(instruction->GetEntrypoint(),
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
DCHECK(!codegen_->IsLeafMethod());
}
@@ -3669,13 +3931,23 @@ void InstructionCodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind)
void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+ bool object_field_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_field_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister());
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps for an object field get when read barriers
+ // are enabled: we do not want the move to overwrite the object's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
@@ -3684,7 +3956,8 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
LocationSummary* locations = instruction->GetLocations();
- CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
+ Location base_loc = locations->InAt(0);
+ CpuRegister base = base_loc.AsRegister<CpuRegister>();
Location out = locations->Out();
bool is_volatile = field_info.IsVolatile();
Primitive::Type field_type = field_info.GetFieldType();
@@ -3744,7 +4017,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
}
if (field_type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, base_loc, offset);
}
}
@@ -3755,14 +4028,25 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
LocationSummary* locations =
new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
Primitive::Type field_type = field_info.GetFieldType();
+ bool is_volatile = field_info.IsVolatile();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
locations->SetInAt(0, Location::RequiresRegister());
if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
- locations->SetInAt(1, Location::RequiresFpuRegister());
+ if (is_volatile) {
+ // In order to satisfy the semantics of volatile, this must be a single instruction store.
+ locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
+ } else {
+ locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
+ }
} else {
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(instruction->InputAt(1)));
+ if (is_volatile) {
+ // In order to satisfy the semantics of volatile, this must be a single instruction store.
+ locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
+ } else {
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ }
}
if (needs_write_barrier) {
// Temporary registers for the write barrier.
@@ -3790,11 +4074,13 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
}
+ bool maybe_record_implicit_null_check_done = false;
+
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
if (value.IsConstant()) {
- int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
__ movb(Address(base, offset), Immediate(v));
} else {
__ movb(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3805,7 +4091,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
case Primitive::kPrimShort:
case Primitive::kPrimChar: {
if (value.IsConstant()) {
- int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
+ int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
__ movw(Address(base, offset), Immediate(v));
} else {
__ movw(Address(base, offset), value.AsRegister<CpuRegister>());
@@ -3838,9 +4124,11 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
case Primitive::kPrimLong: {
if (value.IsConstant()) {
int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
- DCHECK(IsInt<32>(v));
- int32_t v_32 = v;
- __ movq(Address(base, offset), Immediate(v_32));
+ codegen_->MoveInt64ToAddress(Address(base, offset),
+ Address(base, offset + sizeof(int32_t)),
+ v,
+ instruction);
+ maybe_record_implicit_null_check_done = true;
} else {
__ movq(Address(base, offset), value.AsRegister<CpuRegister>());
}
@@ -3848,12 +4136,28 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
}
case Primitive::kPrimFloat: {
- __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ if (value.IsConstant()) {
+ int32_t v =
+ bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+ __ movl(Address(base, offset), Immediate(v));
+ } else {
+ __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ }
break;
}
case Primitive::kPrimDouble: {
- __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ if (value.IsConstant()) {
+ int64_t v =
+ bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ codegen_->MoveInt64ToAddress(Address(base, offset),
+ Address(base, offset + sizeof(int32_t)),
+ v,
+ instruction);
+ maybe_record_implicit_null_check_done = true;
+ } else {
+ __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
+ }
break;
}
@@ -3862,7 +4166,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
UNREACHABLE();
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (!maybe_record_implicit_null_check_done) {
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ }
if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
@@ -4029,20 +4335,31 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
}
void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
+ bool object_array_get_with_read_barrier =
+ kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
LocationSummary* locations =
- new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+ new (GetGraph()->GetArena()) LocationSummary(instruction,
+ object_array_get_with_read_barrier ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
if (Primitive::IsFloatingPointType(instruction->GetType())) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
} else {
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ // The output overlaps for an object array get when read barriers
+ // are enabled: we do not want the move to overwrite the array's
+ // location, as we need it to emit the read barrier.
+ locations->SetOut(
+ Location::RequiresRegister(),
+ object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
}
void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location index = locations->InAt(1);
Primitive::Type type = instruction->GetType();
@@ -4097,8 +4414,9 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- static_assert(sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::HeapReference<mirror::Object> and int32_t have different sizes.");
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
if (index.IsConstant()) {
@@ -4153,8 +4471,17 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
codegen_->MaybeRecordImplicitNullCheck(instruction);
if (type == Primitive::kPrimNot) {
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- __ MaybeUnpoisonHeapReference(out);
+ static_assert(
+ sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+ "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+ uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+ Location out = locations->Out();
+ if (index.IsConstant()) {
+ uint32_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, offset);
+ } else {
+ codegen_->MaybeGenerateReadBarrier(instruction, out, out, obj_loc, data_offset, index);
+ }
}
}
@@ -4164,37 +4491,41 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
bool may_need_runtime_call = instruction->NeedsTypeCheck();
+ bool object_array_set_with_read_barrier =
+ kEmitCompilerReadBarrier && (value_type == Primitive::kPrimNot);
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,
- may_need_runtime_call ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
+ (may_need_runtime_call || object_array_set_with_read_barrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(
- 1, Location::RegisterOrConstant(instruction->InputAt(1)));
- locations->SetInAt(2, Location::RequiresRegister());
- if (value_type == Primitive::kPrimLong) {
- locations->SetInAt(2, Location::RegisterOrInt32LongConstant(instruction->InputAt(2)));
- } else if (value_type == Primitive::kPrimFloat || value_type == Primitive::kPrimDouble) {
- locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+ if (Primitive::IsFloatingPointType(value_type)) {
+ locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
} else {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
if (needs_write_barrier) {
// Temporary registers for the write barrier.
- locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
+
+ // This first temporary register is possibly used for heap
+ // reference poisoning and/or read barrier emission too.
+ locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
LocationSummary* locations = instruction->GetLocations();
- CpuRegister array = locations->InAt(0).AsRegister<CpuRegister>();
+ Location array_loc = locations->InAt(0);
+ CpuRegister array = array_loc.AsRegister<CpuRegister>();
Location index = locations->InAt(1);
Location value = locations->InAt(2);
Primitive::Type value_type = instruction->GetComponentType();
- bool may_need_runtime_call = locations->CanCall();
+ bool may_need_runtime_call = instruction->NeedsTypeCheck();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -4238,6 +4569,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
: Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
+
if (!value.IsRegister()) {
// Just setting null.
DCHECK(instruction->InputAt(2)->IsNullConstant());
@@ -4266,22 +4598,62 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
__ Bind(&not_null);
}
- __ movl(temp, Address(array, class_offset));
- codegen_->MaybeRecordImplicitNullCheck(instruction);
- __ MaybeUnpoisonHeapReference(temp);
- __ movl(temp, Address(temp, component_offset));
- // No need to poison/unpoison, we're comparing two poisoned references.
- __ cmpl(temp, Address(register_value, class_offset));
- if (instruction->StaticTypeOfArrayIsObjectArray()) {
- __ j(kEqual, &do_put);
- __ MaybeUnpoisonHeapReference(temp);
- __ movl(temp, Address(temp, super_offset));
- // No need to unpoison the result, we're comparing against null.
- __ testl(temp, temp);
- __ j(kNotEqual, slow_path->GetEntryLabel());
- __ Bind(&do_put);
+ if (kEmitCompilerReadBarrier) {
+ // When read barriers are enabled, the type checking
+ // instrumentation requires two read barriers:
+ //
+ // __ movl(temp2, temp);
+ // // /* HeapReference<Class> */ temp = temp->component_type_
+ // __ movl(temp, Address(temp, component_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+ //
+ // // /* HeapReference<Class> */ temp2 = register_value->klass_
+ // __ movl(temp2, Address(register_value, class_offset));
+ // codegen_->GenerateReadBarrier(
+ // instruction, temp2_loc, temp2_loc, value, class_offset, temp_loc);
+ //
+ // __ cmpl(temp, temp2);
+ //
+ // However, the second read barrier may trash `temp`, as it
+ // is a temporary register, and as such would not be saved
+ // along with live registers before calling the runtime (nor
+ // restored afterwards). So in this case, we bail out and
+ // delegate the work to the array set slow path.
+ //
+ // TODO: Extend the register allocator to support a new
+ // "(locally) live temp" location so as to avoid always
+ // going into the slow path when read barriers are enabled.
+ __ jmp(slow_path->GetEntryLabel());
} else {
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ // /* HeapReference<Class> */ temp = array->klass_
+ __ movl(temp, Address(array, class_offset));
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->component_type_
+ __ movl(temp, Address(temp, component_offset));
+ // If heap poisoning is enabled, no need to unpoison `temp`
+ // nor the object reference in `register_value->klass`, as
+ // we are comparing two poisoned references.
+ __ cmpl(temp, Address(register_value, class_offset));
+
+ if (instruction->StaticTypeOfArrayIsObjectArray()) {
+ __ j(kEqual, &do_put);
+ // If heap poisoning is enabled, the `temp` reference has
+ // not been unpoisoned yet; unpoison it now.
+ __ MaybeUnpoisonHeapReference(temp);
+
+ // /* HeapReference<Class> */ temp = temp->super_class_
+ __ movl(temp, Address(temp, super_offset));
+ // If heap poisoning is enabled, no need to unpoison
+ // `temp`, as we are comparing against null below.
+ __ testl(temp, temp);
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ Bind(&do_put);
+ } else {
+ __ j(kNotEqual, slow_path->GetEntryLabel());
+ }
}
}
@@ -4307,6 +4679,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
break;
}
+
case Primitive::kPrimInt: {
uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
Address address = index.IsConstant()
@@ -4330,13 +4703,15 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
: Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
if (value.IsRegister()) {
__ movq(address, value.AsRegister<CpuRegister>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
} else {
int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
- DCHECK(IsInt<32>(v));
- int32_t v_32 = v;
- __ movq(address, Immediate(v_32));
+ Address address_high = index.IsConstant()
+ ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+ offset + sizeof(int32_t))
+ : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+ codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
}
- codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -4345,8 +4720,14 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + offset)
: Address(array, index.AsRegister<CpuRegister>(), TIMES_4, offset);
- DCHECK(value.IsFpuRegister());
- __ movss(address, value.AsFpuRegister<XmmRegister>());
+ if (value.IsFpuRegister()) {
+ __ movss(address, value.AsFpuRegister<XmmRegister>());
+ } else {
+ DCHECK(value.IsConstant());
+ int32_t v =
+ bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
+ __ movl(address, Immediate(v));
+ }
codegen_->MaybeRecordImplicitNullCheck(instruction);
break;
}
@@ -4356,9 +4737,18 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
Address address = index.IsConstant()
? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + offset)
: Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset);
- DCHECK(value.IsFpuRegister());
- __ movsd(address, value.AsFpuRegister<XmmRegister>());
- codegen_->MaybeRecordImplicitNullCheck(instruction);
+ if (value.IsFpuRegister()) {
+ __ movsd(address, value.AsFpuRegister<XmmRegister>());
+ codegen_->MaybeRecordImplicitNullCheck(instruction);
+ } else {
+ int64_t v =
+ bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
+ Address address_high = index.IsConstant()
+ ? Address(array, (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) +
+ offset + sizeof(int32_t))
+ : Address(array, index.AsRegister<CpuRegister>(), TIMES_8, offset + sizeof(int32_t));
+ codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
+ }
break;
}
@@ -4739,7 +5129,8 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
CodeGenerator::CreateLoadClassLocationSummary(
cls,
Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
- Location::RegisterLocation(RAX));
+ Location::RegisterLocation(RAX),
+ /* code_generator_supports_read_barrier */ true);
}
void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
@@ -4750,31 +5141,58 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) {
cls,
cls->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
return;
}
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
+
if (cls->IsReferrersClass()) {
DCHECK(!cls->CanCallRuntime());
DCHECK(!cls->MustGenerateClinitCheck());
- __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ leaq(out, Address(current_method, declaring_class_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ movl(out, Address(current_method, declaring_class_offset));
+ }
} else {
- DCHECK(cls->CanCallRuntime());
- __ movq(out, Address(
- current_method, ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
- __ movl(out, Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())));
- // TODO: We will need a read barrier here.
-
- SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
- codegen_->AddSlowPath(slow_path);
- __ testl(out, out);
- __ j(kEqual, slow_path->GetEntryLabel());
- if (cls->MustGenerateClinitCheck()) {
- GenerateClassInitializationCheck(slow_path, out);
+ // /* GcRoot<mirror::Class>[] */ out =
+ // current_method.ptr_sized_fields_->dex_cache_resolved_types_
+ __ movq(out, Address(current_method,
+ ArtMethod::DexCacheResolvedTypesOffset(kX86_64PointerSize).Int32Value()));
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &out[type_index]
+ __ leaq(out, Address(out, cache_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(cls, out_loc, out_loc);
} else {
- __ Bind(slow_path->GetExitLabel());
+ // /* GcRoot<mirror::Class> */ out = out[type_index]
+ __ movl(out, Address(out, cache_offset));
+ }
+
+ if (!cls->IsInDexCache() || cls->MustGenerateClinitCheck()) {
+ DCHECK(cls->CanCallRuntime());
+ SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
+ cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ codegen_->AddSlowPath(slow_path);
+ if (!cls->IsInDexCache()) {
+ __ testl(out, out);
+ __ j(kEqual, slow_path->GetEntryLabel());
+ }
+ if (cls->MustGenerateClinitCheck()) {
+ GenerateClassInitializationCheck(slow_path, out);
+ } else {
+ __ Bind(slow_path->GetExitLabel());
+ }
}
}
}
@@ -4809,12 +5227,35 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
codegen_->AddSlowPath(slow_path);
LocationSummary* locations = load->GetLocations();
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
- __ movl(out, Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()));
- __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Int32Value()));
- __ movl(out, Address(out, CodeGenerator::GetCacheOffset(load->GetStringIndex())));
- // TODO: We will need a read barrier here.
+
+ uint32_t declaring_class_offset = ArtMethod::DeclaringClassOffset().Int32Value();
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::Class>* */ out = &(current_method->declaring_class_)
+ __ leaq(out, Address(current_method, declaring_class_offset));
+ // /* mirror::Class* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+ __ movl(out, Address(current_method, declaring_class_offset));
+ }
+
+ // /* GcRoot<mirror::String>[] */ out = out->dex_cache_strings_
+ __ movq(out, Address(out, mirror::Class::DexCacheStringsOffset().Uint32Value()));
+
+ size_t cache_offset = CodeGenerator::GetCacheOffset(load->GetStringIndex());
+ if (kEmitCompilerReadBarrier) {
+ // /* GcRoot<mirror::String>* */ out = &out[string_index]
+ __ leaq(out, Address(out, cache_offset));
+ // /* mirror::String* */ out = out->Read()
+ codegen_->GenerateReadBarrierForRoot(load, out_loc, out_loc);
+ } else {
+ // /* GcRoot<mirror::String> */ out = out[string_index]
+ __ movl(out, Address(out, cache_offset));
+ }
+
__ testl(out, out);
__ j(kEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -4854,44 +5295,49 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
instruction,
instruction->GetDexPc(),
nullptr);
+ CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = LocationSummary::kNoCall;
+ call_kind =
+ kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
+
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- // Note that TypeCheckSlowPathX86_64 uses this register too.
- locations->SetOut(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
- locations->SetOut(Location::RegisterLocation(RAX));
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
+ locations->SetOut(Location::RequiresRegister());
+ // When read barriers are enabled, we need a temporary register for
+ // some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ locations->AddTemp(Location::RequiresRegister());
}
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ Location out_loc = locations->Out();
+ CpuRegister out = out_loc.AsRegister<CpuRegister>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -4906,15 +5352,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ j(kEqual, &zero);
}
- // In case of an interface/unresolved check, we put the object class into the object register.
- // This is safe, as the register is caller-save, and the object must be in another
- // register if it survives the runtime call.
- CpuRegister target = (instruction->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) ||
- (instruction->GetTypeCheckKind() == TypeCheckKind::kUnresolvedCheck)
- ? obj
- : out;
- __ movl(target, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(target);
+ // /* HeapReference<Class> */ out = obj->klass_
+ __ movl(out, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, obj_loc, class_offset);
switch (instruction->GetTypeCheckKind()) {
case TypeCheckKind::kExactCheck: {
@@ -4936,13 +5376,23 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
NearLabel loop, success;
__ Bind(&loop);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ movl(out, Address(out, super_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -4959,6 +5409,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
NearLabel loop, success;
@@ -4970,8 +5421,17 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &success);
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->super_class_
__ movl(out, Address(out, super_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, super_offset);
__ testl(out, out);
__ j(kNotEqual, &loop);
// If `out` is null, we use it for the result, and jump to `done`.
@@ -4983,6 +5443,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
NearLabel exact_check;
@@ -4993,9 +5454,18 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &exact_check);
- // Otherwise, we need to check that the object's class is a non primitive array.
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp_loc = kEmitCompilerReadBarrier ? locations->GetTemp(0) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `out` into `temp` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
+ __ movl(temp, out);
+ }
+ // /* HeapReference<Class> */ out = out->component_type_
__ movl(out, Address(out, component_offset));
- __ MaybeUnpoisonHeapReference(out);
+ codegen_->MaybeGenerateReadBarrier(instruction, out_loc, out_loc, temp_loc, component_offset);
__ testl(out, out);
// If `out` is null, we use it for the result, and jump to `done`.
__ j(kEqual, &done);
@@ -5006,6 +5476,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ jmp(&done);
break;
}
+
case TypeCheckKind::kArrayCheck: {
if (cls.IsRegister()) {
__ cmpl(out, cls.AsRegister<CpuRegister>());
@@ -5014,8 +5485,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
__ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
DCHECK(locations->OnlyCallsOnSlowPath());
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
- instruction, /* is_fatal */ false);
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+ /* is_fatal */ false);
codegen_->AddSlowPath(slow_path);
__ j(kNotEqual, slow_path->GetEntryLabel());
__ movl(out, Immediate(1));
@@ -5024,13 +5495,25 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
- case TypeCheckKind::kInterfaceCheck:
- default: {
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pInstanceofNonTrivial),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ case TypeCheckKind::kInterfaceCheck: {
+ // Note that we indeed only call on slow path, but we always go
+ // into the slow path for the unresolved & interface check
+ // cases.
+ //
+ // We cannot directly call the InstanceofNonTrivial runtime
+ // entry point without resorting to a type checking slow path
+ // here (i.e. by calling InvokeRuntime directly), as it would
+ // require to assign fixed registers for the inputs of this
+ // HInstanceOf instruction (following the runtime calling
+ // convention), which might be cluttered by the potential first
+ // read barrier emission at the beginning of this method.
+ DCHECK(locations->OnlyCallsOnSlowPath());
+ slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+ /* is_fatal */ false);
+ codegen_->AddSlowPath(slow_path);
+ __ jmp(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
__ jmp(&done);
}
@@ -5055,58 +5538,60 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-
- switch (instruction->GetTypeCheckKind()) {
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kAbstractClassCheck:
case TypeCheckKind::kClassHierarchyCheck:
case TypeCheckKind::kArrayObjectCheck:
- call_kind = throws_into_catch
- ? LocationSummary::kCallOnSlowPath
- : LocationSummary::kNoCall;
+ call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path.
break;
+ case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- call_kind = LocationSummary::kCall;
- break;
- case TypeCheckKind::kArrayCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
}
-
- LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
- instruction, call_kind);
- if (call_kind != LocationSummary::kCall) {
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
- // Note that TypeCheckSlowPathX86_64 uses this register too.
+ LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::Any());
+ // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
+ locations->AddTemp(Location::RequiresRegister());
+ // When read barriers are enabled, we need an additional temporary
+ // register for some cases.
+ if (kEmitCompilerReadBarrier &&
+ (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
locations->AddTemp(Location::RequiresRegister());
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
}
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
- CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
+ Location obj_loc = locations->InAt(0);
+ CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
Location cls = locations->InAt(1);
- CpuRegister temp = locations->WillCall()
- ? CpuRegister(kNoRegister)
- : locations->GetTemp(0).AsRegister<CpuRegister>();
-
+ Location temp_loc = locations->GetTemp(0);
+ CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
- SlowPathCode* slow_path = nullptr;
- if (!locations->WillCall()) {
- slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(
- instruction, !locations->CanCall());
- codegen_->AddSlowPath(slow_path);
- }
+ TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+ bool is_type_check_slow_path_fatal =
+ (type_check_kind == TypeCheckKind::kExactCheck ||
+ type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+ type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+ type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+ !instruction->CanThrowIntoCatchBlock();
+ SlowPathCode* type_check_slow_path =
+ new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
+ is_type_check_slow_path_fatal);
+ codegen_->AddSlowPath(type_check_slow_path);
NearLabel done;
// Avoid null check if we know obj is not null.
@@ -5115,15 +5600,11 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kEqual, &done);
}
- if (locations->WillCall()) {
- __ movl(obj, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(obj);
- } else {
- __ movl(temp, Address(obj, class_offset));
- __ MaybeUnpoisonHeapReference(temp);
- }
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
- switch (instruction->GetTypeCheckKind()) {
+ switch (type_check_kind) {
case TypeCheckKind::kExactCheck:
case TypeCheckKind::kArrayCheck: {
if (cls.IsRegister()) {
@@ -5134,19 +5615,44 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
}
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kAbstractClassCheck: {
// If the class is abstract, we eagerly fetch the super class of the
// object to avoid doing a comparison we know will fail.
- NearLabel loop;
+ NearLabel loop, compare_classes;
__ Bind(&loop);
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ movl(temp, Address(temp, super_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // to the `compare_classes` label to compare it with the checked
+ // class.
__ testl(temp, temp);
- // Jump to the slow path to throw the exception.
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, &compare_classes);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&compare_classes);
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
} else {
@@ -5156,6 +5662,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ j(kNotEqual, &loop);
break;
}
+
case TypeCheckKind::kClassHierarchyCheck: {
// Walk over the class hierarchy to find a match.
NearLabel loop;
@@ -5167,16 +5674,39 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &done);
+
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->super_class_
__ movl(temp, Address(temp, super_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, temp2_loc, super_offset);
+
+ // If the class reference currently in `temp` is not null, jump
+ // back at the beginning of the loop.
__ testl(temp, temp);
__ j(kNotEqual, &loop);
- // Jump to the slow path to throw the exception.
- __ jmp(slow_path->GetEntryLabel());
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kArrayObjectCheck: {
// Do an exact check.
+ NearLabel check_non_primitive_component_type;
if (cls.IsRegister()) {
__ cmpl(temp, cls.AsRegister<CpuRegister>());
} else {
@@ -5184,29 +5714,67 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
__ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
}
__ j(kEqual, &done);
- // Otherwise, we need to check that the object's class is a non primitive array.
+
+ // Otherwise, we need to check that the object's class is a non-primitive array.
+ Location temp2_loc =
+ kEmitCompilerReadBarrier ? locations->GetTemp(1) : Location::NoLocation();
+ if (kEmitCompilerReadBarrier) {
+ // Save the value of `temp` into `temp2` before overwriting it
+ // in the following move operation, as we will need it for the
+ // read barrier below.
+ CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
+ __ movl(temp2, temp);
+ }
+ // /* HeapReference<Class> */ temp = temp->component_type_
__ movl(temp, Address(temp, component_offset));
- __ MaybeUnpoisonHeapReference(temp);
+ codegen_->MaybeGenerateReadBarrier(
+ instruction, temp_loc, temp_loc, temp2_loc, component_offset);
+
+ // If the component type is not null (i.e. the object is indeed
+ // an array), jump to label `check_non_primitive_component_type`
+ // to further check that this component type is not a primitive
+ // type.
__ testl(temp, temp);
- __ j(kEqual, slow_path->GetEntryLabel());
+ __ j(kNotEqual, &check_non_primitive_component_type);
+ // Otherwise, jump to the slow path to throw the exception.
+ //
+ // But before, move back the object's class into `temp` before
+ // going into the slow path, as it has been overwritten in the
+ // meantime.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
+
+ __ Bind(&check_non_primitive_component_type);
__ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
- __ j(kNotEqual, slow_path->GetEntryLabel());
+ __ j(kEqual, &done);
+ // Same comment as above regarding `temp` and the slow path.
+ // /* HeapReference<Class> */ temp = obj->klass_
+ __ movl(temp, Address(obj, class_offset));
+ codegen_->MaybeGenerateReadBarrier(instruction, temp_loc, temp_loc, obj_loc, class_offset);
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
+
case TypeCheckKind::kUnresolvedCheck:
case TypeCheckKind::kInterfaceCheck:
- default:
- codegen_->InvokeRuntime(QUICK_ENTRY_POINT(pCheckCast),
- instruction,
- instruction->GetDexPc(),
- nullptr);
+ // We always go into the type check slow path for the unresolved &
+ // interface check cases.
+ //
+ // We cannot directly call the CheckCast runtime entry point
+ // without resorting to a type checking slow path here (i.e. by
+ // calling InvokeRuntime directly), as it would require to
+ // assign fixed registers for the inputs of this HInstanceOf
+ // instruction (following the runtime calling convention), which
+ // might be cluttered by the potential first read barrier
+ // emission at the beginning of this method.
+ __ jmp(type_check_slow_path->GetEntryLabel());
break;
}
__ Bind(&done);
- if (slow_path != nullptr) {
- __ Bind(slow_path->GetExitLabel());
- }
+ __ Bind(type_check_slow_path->GetExitLabel());
}
void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
@@ -5222,6 +5790,11 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in
instruction,
instruction->GetDexPc(),
nullptr);
+ if (instruction->IsEnter()) {
+ CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+ } else {
+ CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+ }
}
void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
@@ -5339,6 +5912,82 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in
}
}
+void CodeGeneratorX86_64::GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // If heap poisoning is enabled, the unpoisoning of the loaded
+ // reference will be carried out by the runtime within the slow
+ // path.
+ //
+ // Note that `ref` currently does not get unpoisoned (when heap
+ // poisoning is enabled), which is alright as the `ref` argument is
+ // not used by the artReadBarrierSlow entry point.
+ //
+ // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+ SlowPathCode* slow_path = new (GetGraph()->GetArena())
+ ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
+ AddSlowPath(slow_path);
+
+ // TODO: When read barrier has a fast path, add it here.
+ /* Currently the read barrier call is inserted after the original load.
+ * However, if we have a fast path, we need to perform the load of obj.LockWord *before* the
+ * original load. This load-load ordering is required by the read barrier.
+ * The fast path/slow path (for Baker's algorithm) should look like:
+ *
+ * bool isGray = obj.LockWord & kReadBarrierMask;
+ * lfence; // load fence or artificial data dependence to prevent load-load reordering
+ * ref = obj.field; // this is the original load
+ * if (isGray) {
+ * ref = Mark(ref); // ideally the slow path just does Mark(ref)
+ * }
+ */
+
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorX86_64::MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index) {
+ if (kEmitCompilerReadBarrier) {
+ // If heap poisoning is enabled, unpoisoning will be taken care of
+ // by the runtime within the slow path.
+ GenerateReadBarrier(instruction, out, ref, obj, offset, index);
+ } else if (kPoisonHeapReferences) {
+ __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
+ }
+}
+
+void CodeGeneratorX86_64::GenerateReadBarrierForRoot(HInstruction* instruction,
+ Location out,
+ Location root) {
+ DCHECK(kEmitCompilerReadBarrier);
+
+ // Note that GC roots are not affected by heap poisoning, so we do
+ // not need to do anything special for this here.
+ SlowPathCode* slow_path =
+ new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
+ AddSlowPath(slow_path);
+
+ // TODO: Implement a fast path for ReadBarrierForRoot, performing
+ // the following operation (for Baker's algorithm):
+ //
+ // if (thread.tls32_.is_gc_marking) {
+ // root = Mark(root);
+ // }
+
+ __ jmp(slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
+}
+
void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
// Nothing to do, this should be removed during prepare for register allocator.
LOG(FATAL) << "Unreachable";
@@ -5564,6 +6213,24 @@ Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
return Address::RIP(table_fixup);
}
+void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
+ const Address& addr_high,
+ int64_t v,
+ HInstruction* instruction) {
+ if (IsInt<32>(v)) {
+ int32_t v_32 = v;
+ __ movq(addr_low, Immediate(v_32));
+ MaybeRecordImplicitNullCheck(instruction);
+ } else {
+ // Didn't fit in a register. Do it in pieces.
+ int32_t low_v = Low32Bits(v);
+ int32_t high_v = High32Bits(v);
+ __ movl(addr_low, Immediate(low_v));
+ MaybeRecordImplicitNullCheck(instruction);
+ __ movl(addr_high, Immediate(high_v));
+ }
+}
+
#undef __
} // namespace x86_64
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index fc485f5bb6..145b1f33b4 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -217,14 +217,12 @@ class InstructionCodeGeneratorX86_64 : public HGraphVisitor {
void PushOntoFPStack(Location source, uint32_t temp_offset,
uint32_t stack_adjustment, bool is_float);
void GenerateTestAndBranch(HInstruction* instruction,
+ size_t condition_input_index,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
- void GenerateCompareTestAndBranch(HIf* if_inst,
- HCondition* condition,
+ Label* false_target);
+ void GenerateCompareTestAndBranch(HCondition* condition,
Label* true_target,
- Label* false_target,
- Label* always_true_target);
+ Label* false_target);
void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
@@ -352,6 +350,51 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return isa_features_;
}
+ // Generate a read barrier for a heap reference within `instruction`.
+ //
+ // A read barrier for an object reference read from the heap is
+ // implemented as a call to the artReadBarrierSlow runtime entry
+ // point, which is passed the values in locations `ref`, `obj`, and
+ // `offset`:
+ //
+ // mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+ // mirror::Object* obj,
+ // uint32_t offset);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierSlow.
+ //
+ // When `index` provided (i.e., when it is different from
+ // Location::NoLocation()), the offset value passed to
+ // artReadBarrierSlow is adjusted to take `index` into account.
+ void GenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // If read barriers are enabled, generate a read barrier for a heap reference.
+ // If heap poisoning is enabled, also unpoison the reference in `out`.
+ void MaybeGenerateReadBarrier(HInstruction* instruction,
+ Location out,
+ Location ref,
+ Location obj,
+ uint32_t offset,
+ Location index = Location::NoLocation());
+
+ // Generate a read barrier for a GC root within `instruction`.
+ //
+ // A read barrier for an object reference GC root is implemented as
+ // a call to the artReadBarrierForRootSlow runtime entry point,
+ // which is passed the value in location `root`:
+ //
+ // mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+ //
+ // The `out` location contains the value returned by
+ // artReadBarrierForRootSlow.
+ void GenerateReadBarrierForRoot(HInstruction* instruction, Location out, Location root);
+
int ConstantAreaStart() const {
return constant_area_start_;
}
@@ -368,6 +411,12 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
void Store64BitValueToStack(Location dest, int64_t value);
+ // Assign a 64 bit constant to an address.
+ void MoveInt64ToAddress(const Address& addr_low,
+ const Address& addr_high,
+ int64_t v,
+ HInstruction* instruction);
+
private:
struct PcRelativeDexCacheAccessInfo {
PcRelativeDexCacheAccessInfo(const DexFile& dex_file, uint32_t element_off)
@@ -395,7 +444,7 @@ class CodeGeneratorX86_64 : public CodeGenerator {
ArenaDeque<MethodPatchInfo<Label>> method_patches_;
ArenaDeque<MethodPatchInfo<Label>> relative_call_patches_;
// PC-relative DexCache access info.
- ArenaDeque<PcRelativeDexCacheAccessInfo> pc_rel_dex_cache_patches_;
+ ArenaDeque<PcRelativeDexCacheAccessInfo> pc_relative_dex_cache_patches_;
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index e1a8c9cc0f..af8b8b562a 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -17,6 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM64_H_
+#include "code_generator.h"
#include "locations.h"
#include "nodes.h"
#include "utils/arm64/assembler_arm64.h"
@@ -255,6 +256,67 @@ static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
return true;
}
+static inline vixl::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+ switch (op_kind) {
+ case HArm64DataProcWithShifterOp::kASR: return vixl::ASR;
+ case HArm64DataProcWithShifterOp::kLSL: return vixl::LSL;
+ case HArm64DataProcWithShifterOp::kLSR: return vixl::LSR;
+ default:
+ LOG(FATAL) << "Unexpected op kind " << op_kind;
+ UNREACHABLE();
+ return vixl::NO_SHIFT;
+ }
+}
+
+static inline vixl::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+ switch (op_kind) {
+ case HArm64DataProcWithShifterOp::kUXTB: return vixl::UXTB;
+ case HArm64DataProcWithShifterOp::kUXTH: return vixl::UXTH;
+ case HArm64DataProcWithShifterOp::kUXTW: return vixl::UXTW;
+ case HArm64DataProcWithShifterOp::kSXTB: return vixl::SXTB;
+ case HArm64DataProcWithShifterOp::kSXTH: return vixl::SXTH;
+ case HArm64DataProcWithShifterOp::kSXTW: return vixl::SXTW;
+ default:
+ LOG(FATAL) << "Unexpected op kind " << op_kind;
+ UNREACHABLE();
+ return vixl::NO_EXTEND;
+ }
+}
+
+static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+ if (instruction->IsTypeConversion()) {
+ HTypeConversion* conversion = instruction->AsTypeConversion();
+ Primitive::Type result_type = conversion->GetResultType();
+ Primitive::Type input_type = conversion->GetInputType();
+ // We don't expect to see the same type as input and result.
+ return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) &&
+ (result_type != input_type);
+ } else {
+ return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) ||
+ (instruction->IsShr() && instruction->AsShr()->InputAt(1)->IsIntConstant()) ||
+ (instruction->IsUShr() && instruction->AsUShr()->InputAt(1)->IsIntConstant());
+ }
+}
+
+static inline bool HasShifterOperand(HInstruction* instr) {
+ // `neg` instructions are an alias of `sub` using the zero register as the
+ // first register input.
+ bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
+ instr->IsOr() || instr->IsSub() || instr->IsXor();
+ return res;
+}
+
+static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+ DCHECK(HasShifterOperand(instruction));
+ // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
+ // does *not* support extension. This is because the `extended register` form
+ // of the `sub` instruction interprets the left register with code 31 as the
+ // stack pointer and not the zero register. (So does the `immediate` form.) In
+ // the other form `shifted register, the register with code 31 is interpreted
+ // as the zero register.
+ return instruction->IsAdd() || instruction->IsSub();
+}
+
} // namespace helpers
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
new file mode 100644
index 0000000000..b459d24d7c
--- /dev/null
+++ b/compiler/optimizing/common_dominator.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+#define ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
+
+#include "nodes.h"
+
+namespace art {
+
+// Helper class for finding common dominators of two or more blocks in a graph.
+// The domination information of a graph must not be modified while there is
+// a CommonDominator object as it's internal state could become invalid.
+class CommonDominator {
+ public:
+ // Convenience function to find the common dominator of 2 blocks.
+ static HBasicBlock* ForPair(HBasicBlock* block1, HBasicBlock* block2) {
+ CommonDominator finder(block1);
+ finder.Update(block2);
+ return finder.Get();
+ }
+
+ // Create a finder starting with a given block.
+ explicit CommonDominator(HBasicBlock* block)
+ : dominator_(block), chain_length_(ChainLength(block)) {
+ DCHECK(block != nullptr);
+ }
+
+ // Update the common dominator with another block.
+ void Update(HBasicBlock* block) {
+ DCHECK(block != nullptr);
+ HBasicBlock* block2 = dominator_;
+ DCHECK(block2 != nullptr);
+ if (block == block2) {
+ return;
+ }
+ size_t chain_length = ChainLength(block);
+ size_t chain_length2 = chain_length_;
+ // Equalize the chain lengths
+ for ( ; chain_length > chain_length2; --chain_length) {
+ block = block->GetDominator();
+ DCHECK(block != nullptr);
+ }
+ for ( ; chain_length2 > chain_length; --chain_length2) {
+ block2 = block2->GetDominator();
+ DCHECK(block2 != nullptr);
+ }
+ // Now run up the chain until we hit the common dominator.
+ while (block != block2) {
+ --chain_length;
+ block = block->GetDominator();
+ DCHECK(block != nullptr);
+ block2 = block2->GetDominator();
+ DCHECK(block2 != nullptr);
+ }
+ dominator_ = block;
+ chain_length_ = chain_length;
+ }
+
+ HBasicBlock* Get() const {
+ return dominator_;
+ }
+
+ private:
+ static size_t ChainLength(HBasicBlock* block) {
+ size_t result = 0;
+ while (block != nullptr) {
+ ++result;
+ block = block->GetDominator();
+ }
+ return result;
+ }
+
+ HBasicBlock* dominator_;
+ size_t chain_length_;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_COMMON_DOMINATOR_H_
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9754043f32..02e5dab3d4 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -123,20 +123,21 @@ void HDeadCodeElimination::RemoveDeadBlocks() {
}
// If we removed at least one block, we need to recompute the full
- // dominator tree.
+ // dominator tree and try block membership.
if (removed_one_or_more_blocks) {
graph_->ClearDominanceInformation();
graph_->ComputeDominanceInformation();
+ graph_->ComputeTryBlockInformation();
}
// Connect successive blocks created by dead branches. Order does not matter.
for (HReversePostOrderIterator it(*graph_); !it.Done();) {
HBasicBlock* block = it.Current();
- if (block->IsEntryBlock() || block->GetSuccessors().size() != 1u) {
+ if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
it.Advance();
continue;
}
- HBasicBlock* successor = block->GetSuccessors()[0];
+ HBasicBlock* successor = block->GetSingleSuccessor();
if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
it.Advance();
continue;
@@ -176,10 +177,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
}
void HDeadCodeElimination::Run() {
- if (!graph_->HasTryCatch()) {
- // TODO: Update dead block elimination and enable for try/catch.
- RemoveDeadBlocks();
- }
+ RemoveDeadBlocks();
SsaRedundantPhiElimination(graph_).Run();
RemoveDeadInstructions();
}
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
new file mode 100644
index 0000000000..65820630f8
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.cc
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_cache_array_fixups_arm.h"
+
+#include "base/arena_containers.h"
+#include "utils/dex_cache_arrays_layout-inl.h"
+
+namespace art {
+namespace arm {
+
+/**
+ * Finds instructions that need the dex cache arrays base as an input.
+ */
+class DexCacheArrayFixupsVisitor : public HGraphVisitor {
+ public:
+ explicit DexCacheArrayFixupsVisitor(HGraph* graph)
+ : HGraphVisitor(graph),
+ dex_cache_array_bases_(std::less<const DexFile*>(),
+ // Attribute memory use to code generator.
+ graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
+
+ void MoveBasesIfNeeded() {
+ for (const auto& entry : dex_cache_array_bases_) {
+ // Bring the base closer to the first use (previously, it was in the
+ // entry block) and relieve some pressure on the register allocator
+ // while avoiding recalculation of the base in a loop.
+ HArmDexCacheArraysBase* base = entry.second;
+ base->MoveBeforeFirstUserAndOutOfLoops();
+ }
+ }
+
+ private:
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ // If this is an invoke with PC-relative access to the dex cache methods array,
+ // we need to add the dex cache arrays base as the special input.
+ if (invoke->HasPcRelativeDexCache()) {
+ // Initialize base for target method dex file if needed.
+ MethodReference target_method = invoke->GetTargetMethod();
+ HArmDexCacheArraysBase* base = GetOrCreateDexCacheArrayBase(*target_method.dex_file);
+ // Update the element offset in base.
+ DexCacheArraysLayout layout(kArmPointerSize, target_method.dex_file);
+ base->UpdateElementOffset(layout.MethodOffset(target_method.dex_method_index));
+ // Add the special argument base to the method.
+ DCHECK(!invoke->HasCurrentMethodInput());
+ invoke->AddSpecialInput(base);
+ }
+ }
+
+ HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
+ // Ensure we only initialize the pointer once for each dex file.
+ auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
+ if (lb != dex_cache_array_bases_.end() &&
+ !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
+ return lb->second;
+ }
+
+ // Insert the base at the start of the entry block, move it to a better
+ // position later in MoveBaseIfNeeded().
+ HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
+ HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+ entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
+ dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
+ return base;
+ }
+
+ using DexCacheArraysBaseMap =
+ ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
+ DexCacheArraysBaseMap dex_cache_array_bases_;
+};
+
+void DexCacheArrayFixups::Run() {
+ DexCacheArrayFixupsVisitor visitor(graph_);
+ visitor.VisitInsertionOrder();
+ visitor.MoveBasesIfNeeded();
+}
+
+} // namespace arm
+} // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
new file mode 100644
index 0000000000..015f910328
--- /dev/null
+++ b/compiler/optimizing/dex_cache_array_fixups_arm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+namespace arm {
+
+class DexCacheArrayFixups : public HOptimization {
+ public:
+ DexCacheArrayFixups(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, "dex_cache_array_fixups_arm", stats) {}
+
+ void Run() OVERRIDE;
+};
+
+} // namespace arm
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index 3de96b5d84..c16b872466 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -163,12 +163,12 @@ void GraphChecker::VisitBoundsCheck(HBoundsCheck* check) {
}
void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
- // Ensure that all exception handlers are catch blocks and that handlers
- // are not listed multiple times.
+ ArrayRef<HBasicBlock* const> handlers = try_boundary->GetExceptionHandlers();
+
+ // Ensure that all exception handlers are catch blocks.
// Note that a normal-flow successor may be a catch block before CFG
// simplification. We only test normal-flow successors in SsaChecker.
- for (HExceptionHandlerIterator it(*try_boundary); !it.Done(); it.Advance()) {
- HBasicBlock* handler = it.Current();
+ for (HBasicBlock* handler : handlers) {
if (!handler->IsCatchBlock()) {
AddError(StringPrintf("Block %d with %s:%d has exceptional successor %d which "
"is not a catch block.",
@@ -177,9 +177,13 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
try_boundary->GetId(),
handler->GetBlockId()));
}
- if (current_block_->HasSuccessor(handler, it.CurrentSuccessorIndex() + 1)) {
- AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
- handler->GetBlockId(),
+ }
+
+ // Ensure that handlers are not listed multiple times.
+ for (size_t i = 0, e = handlers.size(); i < e; ++i) {
+ if (ContainsElement(handlers, handlers[i], i + 1)) {
+ AddError(StringPrintf("Exception handler block %d of %s:%d is listed multiple times.",
+ handlers[i]->GetBlockId(),
try_boundary->DebugName(),
try_boundary->GetId()));
}
@@ -188,6 +192,21 @@ void GraphChecker::VisitTryBoundary(HTryBoundary* try_boundary) {
VisitInstruction(try_boundary);
}
+void GraphChecker::VisitLoadException(HLoadException* load) {
+ // Ensure that LoadException is the first instruction in a catch block.
+ if (!load->GetBlock()->IsCatchBlock()) {
+ AddError(StringPrintf("%s:%d is in a non-catch block %d.",
+ load->DebugName(),
+ load->GetId(),
+ load->GetBlock()->GetBlockId()));
+ } else if (load->GetBlock()->GetFirstInstruction() != load) {
+ AddError(StringPrintf("%s:%d is not the first instruction in catch block %d.",
+ load->DebugName(),
+ load->GetId(),
+ load->GetBlock()->GetBlockId()));
+ }
+}
+
void GraphChecker::VisitInstruction(HInstruction* instruction) {
if (seen_ids_.IsBitSet(instruction->GetId())) {
AddError(StringPrintf("Instruction id %d is duplicate in graph.",
@@ -242,10 +261,11 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) {
}
size_t use_index = use_it.Current()->GetIndex();
if ((use_index >= use->InputCount()) || (use->InputAt(use_index) != instruction)) {
- AddError(StringPrintf("User %s:%d of instruction %d has a wrong "
+ AddError(StringPrintf("User %s:%d of instruction %s:%d has a wrong "
"UseListNode index.",
use->DebugName(),
use->GetId(),
+ instruction->DebugName(),
instruction->GetId()));
}
}
@@ -355,17 +375,14 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
// Ensure that catch blocks are not normal successors, and normal blocks are
// never exceptional successors.
- const size_t num_normal_successors = block->NumberOfNormalSuccessors();
- for (size_t j = 0; j < num_normal_successors; ++j) {
- HBasicBlock* successor = block->GetSuccessors()[j];
+ for (HBasicBlock* successor : block->GetNormalSuccessors()) {
if (successor->IsCatchBlock()) {
AddError(StringPrintf("Catch block %d is a normal successor of block %d.",
successor->GetBlockId(),
block->GetBlockId()));
}
}
- for (size_t j = num_normal_successors, e = block->GetSuccessors().size(); j < e; ++j) {
- HBasicBlock* successor = block->GetSuccessors()[j];
+ for (HBasicBlock* successor : block->GetExceptionalSuccessors()) {
if (!successor->IsCatchBlock()) {
AddError(StringPrintf("Normal block %d is an exceptional successor of block %d.",
successor->GetBlockId(),
@@ -377,10 +394,14 @@ void SSAChecker::VisitBasicBlock(HBasicBlock* block) {
// block with multiple successors to a block with multiple
// predecessors). Exceptional edges are synthesized and hence
// not accounted for.
- if (block->NumberOfNormalSuccessors() > 1) {
- for (size_t j = 0, e = block->NumberOfNormalSuccessors(); j < e; ++j) {
- HBasicBlock* successor = block->GetSuccessors()[j];
- if (successor->GetPredecessors().size() > 1) {
+ if (block->GetSuccessors().size() > 1) {
+ for (HBasicBlock* successor : block->GetNormalSuccessors()) {
+ if (successor->IsExitBlock() &&
+ block->IsSingleTryBoundary() &&
+ block->GetPredecessors().size() == 1u &&
+ block->GetSinglePredecessor()->GetLastInstruction()->IsThrow()) {
+ // Allowed critical edge Throw->TryBoundary->Exit.
+ } else if (successor->GetPredecessors().size() > 1) {
AddError(StringPrintf("Critical edge between blocks %d and %d.",
block->GetBlockId(),
successor->GetBlockId()));
@@ -445,12 +466,18 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
HLoopInformation* loop_information = loop_header->GetLoopInformation();
- // Ensure the pre-header block is first in the list of
- // predecessors of a loop header.
+ // Ensure the pre-header block is first in the list of predecessors of a loop
+ // header and that the header block is its only successor.
if (!loop_header->IsLoopPreHeaderFirstPredecessor()) {
AddError(StringPrintf(
"Loop pre-header is not the first predecessor of the loop header %d.",
id));
+ } else if (loop_information->GetPreHeader()->GetSuccessors().size() != 1) {
+ AddError(StringPrintf(
+ "Loop pre-header %d of loop defined by header %d has %zu successors.",
+ loop_information->GetPreHeader()->GetBlockId(),
+ id,
+ loop_information->GetPreHeader()->GetSuccessors().size()));
}
// Ensure the loop header has only one incoming branch and the remaining
@@ -493,6 +520,13 @@ void SSAChecker::CheckLoop(HBasicBlock* loop_header) {
"Loop defined by header %d has an invalid back edge %d.",
id,
back_edge_id));
+ } else if (back_edge->GetLoopInformation() != loop_information) {
+ AddError(StringPrintf(
+ "Back edge %d of loop defined by header %d belongs to nested loop "
+ "with header %d.",
+ back_edge_id,
+ id,
+ back_edge->GetLoopInformation()->GetHeader()->GetBlockId()));
}
}
}
@@ -531,10 +565,14 @@ void SSAChecker::VisitInstruction(HInstruction* instruction) {
!use_it.Done(); use_it.Advance()) {
HInstruction* use = use_it.Current()->GetUser();
if (!use->IsPhi() && !instruction->StrictlyDominates(use)) {
- AddError(StringPrintf("Instruction %d in block %d does not dominate "
- "use %d in block %d.",
- instruction->GetId(), current_block_->GetBlockId(),
- use->GetId(), use->GetBlock()->GetBlockId()));
+ AddError(StringPrintf("Instruction %s:%d in block %d does not dominate "
+ "use %s:%d in block %d.",
+ instruction->DebugName(),
+ instruction->GetId(),
+ current_block_->GetBlockId(),
+ use->DebugName(),
+ use->GetId(),
+ use->GetBlock()->GetBlockId()));
}
}
@@ -697,26 +735,31 @@ void SSAChecker::VisitPhi(HPhi* phi) {
}
}
- // Test phi equivalents. There should not be two of the same type and they
- // should only be created for constants which were untyped in DEX.
- for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- HPhi* other_phi = phi_it.Current()->AsPhi();
- if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
- if (phi->GetType() == other_phi->GetType()) {
- std::stringstream type_str;
- type_str << phi->GetType();
- AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
- phi->GetId(),
- phi->GetRegNumber(),
- type_str.str().c_str()));
- } else {
- ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
- if (!IsConstantEquivalent(phi, other_phi, &visited)) {
- AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
- "are not equivalents of constants.",
+ // Test phi equivalents. There should not be two of the same type and they should only be
+ // created for constants which were untyped in DEX. Note that this test can be skipped for
+ // a synthetic phi (indicated by lack of a virtual register).
+ if (phi->GetRegNumber() != kNoRegNumber) {
+ for (HInstructionIterator phi_it(phi->GetBlock()->GetPhis());
+ !phi_it.Done();
+ phi_it.Advance()) {
+ HPhi* other_phi = phi_it.Current()->AsPhi();
+ if (phi != other_phi && phi->GetRegNumber() == other_phi->GetRegNumber()) {
+ if (phi->GetType() == other_phi->GetType()) {
+ std::stringstream type_str;
+ type_str << phi->GetType();
+ AddError(StringPrintf("Equivalent phi (%d) found for VReg %d with type: %s.",
phi->GetId(),
- other_phi->GetId(),
- phi->GetRegNumber()));
+ phi->GetRegNumber(),
+ type_str.str().c_str()));
+ } else {
+ ArenaBitVector visited(GetGraph()->GetArena(), 0, /* expandable */ true);
+ if (!IsConstantEquivalent(phi, other_phi, &visited)) {
+ AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they "
+ "are not equivalents of constants.",
+ phi->GetId(),
+ other_phi->GetId(),
+ phi->GetRegNumber()));
+ }
}
}
}
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index abf3659d91..d5ddbabc8c 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -50,6 +50,9 @@ class GraphChecker : public HGraphDelegateVisitor {
// Check successors of blocks ending in TryBoundary.
void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
+ // Check that LoadException is the first instruction in a catch block.
+ void VisitLoadException(HLoadException* load) OVERRIDE;
+
// Check that HCheckCast and HInstanceOf have HLoadClass as second input.
void VisitCheckCast(HCheckCast* check) OVERRIDE;
void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 4111671a9b..e9fdb84d1e 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -24,6 +24,7 @@
#include "code_generator.h"
#include "dead_code_elimination.h"
#include "disassembler.h"
+#include "inliner.h"
#include "licm.h"
#include "nodes.h"
#include "optimization.h"
@@ -252,8 +253,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void PrintSuccessors(HBasicBlock* block) {
AddIndent();
output_ << "successors";
- for (size_t i = 0; i < block->NumberOfNormalSuccessors(); ++i) {
- HBasicBlock* successor = block->GetSuccessors()[i];
+ for (HBasicBlock* successor : block->GetNormalSuccessors()) {
output_ << " \"B" << successor->GetBlockId() << "\" ";
}
output_<< std::endl;
@@ -262,8 +262,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void PrintExceptionHandlers(HBasicBlock* block) {
AddIndent();
output_ << "xhandlers";
- for (size_t i = block->NumberOfNormalSuccessors(); i < block->GetSuccessors().size(); ++i) {
- HBasicBlock* handler = block->GetSuccessors()[i];
+ for (HBasicBlock* handler : block->GetExceptionalSuccessors()) {
output_ << " \"B" << handler->GetBlockId() << "\" ";
}
if (block->IsExitBlock() &&
@@ -394,9 +393,15 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
VisitInvoke(invoke);
- StartAttributeStream("recursive") << std::boolalpha
- << invoke->IsRecursive()
- << std::noboolalpha;
+ StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
+ StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
+ if (invoke->IsStatic()) {
+ StartAttributeStream("clinit_check") << invoke->GetClinitCheckRequirement();
+ }
+ }
+
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ VisitInvoke(invoke);
StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
}
@@ -420,13 +425,21 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
- bool IsPass(const char* name) {
- return strcmp(pass_name_, name) == 0;
+#ifdef ART_ENABLE_CODEGEN_arm64
+ void VisitArm64DataProcWithShifterOp(HArm64DataProcWithShifterOp* instruction) OVERRIDE {
+ StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
+ if (HArm64DataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
+ StartAttributeStream("shift") << instruction->GetShiftAmount();
+ }
+ }
+
+ void VisitArm64MultiplyAccumulate(HArm64MultiplyAccumulate* instruction) OVERRIDE {
+ StartAttributeStream("kind") << instruction->GetOpKind();
}
+#endif
- bool IsReferenceTypePropagationPass() {
- return strstr(pass_name_, ReferenceTypePropagation::kReferenceTypePropagationPassName)
- != nullptr;
+ bool IsPass(const char* name) {
+ return strcmp(pass_name_, name) == 0;
}
void PrintInstruction(HInstruction* instruction) {
@@ -492,7 +505,8 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
} else {
StartAttributeStream("loop") << "B" << info->GetHeader()->GetBlockId();
}
- } else if (IsReferenceTypePropagationPass()
+ } else if ((IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName)
+ || IsPass(HInliner::kInlinerPassName))
&& (instruction->GetType() == Primitive::kPrimNot)) {
ReferenceTypeInfo info = instruction->IsLoadClass()
? instruction->AsLoadClass()->GetLoadedClassRTI()
@@ -505,6 +519,18 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
} else if (instruction->IsLoadClass()) {
StartAttributeStream("klass") << "unresolved";
+ } else if (instruction->IsNullConstant()) {
+ // The NullConstant may be added to the graph during other passes that happen between
+ // ReferenceTypePropagation and Inliner (e.g. InstructionSimplifier). If the inliner
+ // doesn't run or doesn't inline anything, the NullConstant remains untyped.
+ // So we should check NullConstants for validity only after reference type propagation.
+ //
+ // Note: The infrastructure to properly type NullConstants everywhere is to complex to add
+ // for the benefits.
+ StartAttributeStream("klass") << "not_set";
+ DCHECK(!is_after_pass_
+ || !IsPass(ReferenceTypePropagation::kReferenceTypePropagationPassName))
+ << " Expected a valid rti after reference type propagation";
} else {
DCHECK(!is_after_pass_)
<< "Expected a valid rti after reference type propagation";
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index c36de84064..4af111b784 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -377,9 +377,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
HInstruction* current = block->GetFirstInstruction();
while (current != nullptr) {
- set->Kill(current->GetSideEffects());
// Save the next instruction in case `current` is removed from the graph.
HInstruction* next = current->GetNext();
+ // Do not kill the set with the side effects of the instruction just now: if
+ // the instruction is GVN'ed, we don't need to kill.
if (current->CanBeMoved()) {
if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
// For commutative ops, (x op y) will be treated the same as (y op x)
@@ -395,8 +396,11 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
current->ReplaceWith(existing);
current->GetBlock()->RemoveInstruction(current);
} else {
+ set->Kill(current->GetSideEffects());
set->Add(current);
}
+ } else {
+ set->Kill(current->GetSideEffects());
}
current = next;
}
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index b7262f6b29..5de94f43c9 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -69,10 +69,13 @@ class InductionVarAnalysisTest : public testing::Test {
entry_ = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(entry_);
BuildForLoop(0, n);
+ return_ = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(return_);
exit_ = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(exit_);
entry_->AddSuccessor(loop_preheader_[0]);
- loop_header_[0]->AddSuccessor(exit_);
+ loop_header_[0]->AddSuccessor(return_);
+ return_->AddSuccessor(exit_);
graph_->SetEntryBlock(entry_);
graph_->SetExitBlock(exit_);
@@ -91,6 +94,7 @@ class InductionVarAnalysisTest : public testing::Test {
entry_->AddInstruction(new (&allocator_) HStoreLocal(tmp_, constant100_));
dum_ = new (&allocator_) HLocal(n + 2);
entry_->AddInstruction(dum_);
+ return_->AddInstruction(new (&allocator_) HReturnVoid());
exit_->AddInstruction(new (&allocator_) HExit());
// Provide loop instructions.
@@ -177,6 +181,7 @@ class InductionVarAnalysisTest : public testing::Test {
// Fixed basic blocks and instructions.
HBasicBlock* entry_;
+ HBasicBlock* return_;
HBasicBlock* exit_;
HInstruction* parameter_; // "this"
HInstruction* constant0_;
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5530d261d2..2ac1e152e1 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -75,10 +75,12 @@ static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
return v;
}
-static HInstruction* Insert(HBasicBlock* preheader, HInstruction* instruction) {
- DCHECK(preheader != nullptr);
+/** Helper method to insert an instruction. */
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+ DCHECK(block != nullptr);
+ DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
DCHECK(instruction != nullptr);
- preheader->InsertInstructionBefore(instruction, preheader->GetLastInstruction());
+ block->InsertInstructionBefore(instruction, block->GetLastInstruction());
return instruction;
}
@@ -91,48 +93,98 @@ InductionVarRange::InductionVarRange(HInductionVarAnalysis* induction_analysis)
DCHECK(induction_analysis != nullptr);
}
-InductionVarRange::Value InductionVarRange::GetMinInduction(HInstruction* context,
- HInstruction* instruction) {
- return GetInduction(context, instruction, /* is_min */ true);
-}
-
-InductionVarRange::Value InductionVarRange::GetMaxInduction(HInstruction* context,
- HInstruction* instruction) {
- return SimplifyMax(GetInduction(context, instruction, /* is_min */ false));
+void InductionVarRange::GetInductionRange(HInstruction* context,
+ HInstruction* instruction,
+ /*out*/Value* min_val,
+ /*out*/Value* max_val,
+ /*out*/bool* needs_finite_test) {
+ HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop
+ if (loop != nullptr) {
+ // Set up loop information.
+ HBasicBlock* header = loop->GetHeader();
+ bool in_body = context->GetBlock() != header;
+ HInductionVarAnalysis::InductionInfo* info =
+ induction_analysis_->LookupInfo(loop, instruction);
+ HInductionVarAnalysis::InductionInfo* trip =
+ induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
+ // Find range.
+ *min_val = GetVal(info, trip, in_body, /* is_min */ true);
+ *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+ *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+ } else {
+ // No loop to analyze.
+ *min_val = Value();
+ *max_val = Value();
+ *needs_finite_test = false;
+ }
}
bool InductionVarRange::CanGenerateCode(HInstruction* context,
HInstruction* instruction,
- /*out*/bool* top_test) {
- return GenerateCode(context, instruction, nullptr, nullptr, nullptr, nullptr, top_test);
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test) {
+ return GenerateCode(context,
+ instruction,
+ nullptr, nullptr, nullptr, nullptr, nullptr, // nothing generated yet
+ needs_finite_test,
+ needs_taken_test);
}
-bool InductionVarRange::GenerateCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/HInstruction** lower,
- /*out*/HInstruction** upper) {
- return GenerateCode(context, instruction, graph, block, lower, upper, nullptr);
+void InductionVarRange::GenerateRangeCode(HInstruction* context,
+ HInstruction* instruction,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** lower,
+ /*out*/HInstruction** upper) {
+ bool b1, b2; // unused
+ if (!GenerateCode(context, instruction, graph, block, lower, upper, nullptr, &b1, &b2)) {
+ LOG(FATAL) << "Failed precondition: GenerateCode()";
+ }
+}
+
+void InductionVarRange::GenerateTakenTest(HInstruction* context,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** taken_test) {
+ bool b1, b2; // unused
+ if (!GenerateCode(context, context, graph, block, nullptr, nullptr, taken_test, &b1, &b2)) {
+ LOG(FATAL) << "Failed precondition: GenerateCode()";
+ }
}
//
// Private class methods.
//
-InductionVarRange::Value InductionVarRange::GetInduction(HInstruction* context,
- HInstruction* instruction,
- bool is_min) {
- HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop
- if (loop != nullptr) {
- HBasicBlock* header = loop->GetHeader();
- bool in_body = context->GetBlock() != header;
- return GetVal(induction_analysis_->LookupInfo(loop, instruction),
- induction_analysis_->LookupInfo(loop, header->GetLastInstruction()),
- in_body,
- is_min);
+bool InductionVarRange::NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+ if (info != nullptr) {
+ if (info->induction_class == HInductionVarAnalysis::kLinear) {
+ return true;
+ } else if (info->induction_class == HInductionVarAnalysis::kWrapAround) {
+ return NeedsTripCount(info->op_b);
+ }
}
- return Value();
+ return false;
+}
+
+bool InductionVarRange::IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+ if (trip != nullptr) {
+ if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+ return trip->operation == HInductionVarAnalysis::kTripCountInBody ||
+ trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe;
+ }
+ }
+ return false;
+}
+
+bool InductionVarRange::IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+ if (trip != nullptr) {
+ if (trip->induction_class == HInductionVarAnalysis::kInvariant) {
+ return trip->operation == HInductionVarAnalysis::kTripCountInBodyUnsafe ||
+ trip->operation == HInductionVarAnalysis::kTripCountInLoopUnsafe;
+ }
+ }
+ return false;
}
InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction,
@@ -184,11 +236,13 @@ InductionVarRange::Value InductionVarRange::GetVal(HInductionVarAnalysis::Induct
case HInductionVarAnalysis::kFetch:
return GetFetch(info->fetch, trip, in_body, is_min);
case HInductionVarAnalysis::kTripCountInLoop:
+ case HInductionVarAnalysis::kTripCountInLoopUnsafe:
if (!in_body && !is_min) { // one extra!
return GetVal(info->op_a, trip, in_body, is_min);
}
FALLTHROUGH_INTENDED;
case HInductionVarAnalysis::kTripCountInBody:
+ case HInductionVarAnalysis::kTripCountInBodyUnsafe:
if (is_min) {
return Value(0);
} else if (in_body) {
@@ -356,25 +410,46 @@ bool InductionVarRange::GenerateCode(HInstruction* context,
HBasicBlock* block,
/*out*/HInstruction** lower,
/*out*/HInstruction** upper,
- /*out*/bool* top_test) {
+ /*out*/HInstruction** taken_test,
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test) {
HLoopInformation* loop = context->GetBlock()->GetLoopInformation(); // closest enveloping loop
if (loop != nullptr) {
+ // Set up loop information.
HBasicBlock* header = loop->GetHeader();
bool in_body = context->GetBlock() != header;
- HInductionVarAnalysis::InductionInfo* info = induction_analysis_->LookupInfo(loop, instruction);
+ HInductionVarAnalysis::InductionInfo* info =
+ induction_analysis_->LookupInfo(loop, instruction);
+ if (info == nullptr) {
+ return false; // nothing to analyze
+ }
HInductionVarAnalysis::InductionInfo* trip =
induction_analysis_->LookupInfo(loop, header->GetLastInstruction());
- if (info != nullptr && trip != nullptr) {
- if (top_test != nullptr) {
- *top_test = trip->operation != HInductionVarAnalysis::kTripCountInLoop;
+ // Determine what tests are needed. A finite test is needed if the evaluation code uses the
+ // trip-count and the loop maybe unsafe (because in such cases, the index could "overshoot"
+ // the computed range). A taken test is needed for any unknown trip-count, even if evaluation
+ // code does not use the trip-count explicitly (since there could be an implicit relation
+ // between e.g. an invariant subscript and a not-taken condition).
+ *needs_finite_test = NeedsTripCount(info) && IsUnsafeTripCount(trip);
+ *needs_taken_test = IsBodyTripCount(trip);
+ // Code generation for taken test: generate the code when requested or otherwise analyze
+ // if code generation is feasible when taken test is needed.
+ if (taken_test != nullptr) {
+ return GenerateCode(
+ trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+ } else if (*needs_taken_test) {
+ if (!GenerateCode(
+ trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+ return false;
}
- return
+ }
+ // Code generation for lower and upper.
+ return
// Success on lower if invariant (not set), or code can be generated.
((info->induction_class == HInductionVarAnalysis::kInvariant) ||
GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
// And success on upper.
GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
- }
}
return false;
}
@@ -387,19 +462,38 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
bool in_body,
bool is_min) {
if (info != nullptr) {
+ // Handle current operation.
Primitive::Type type = Primitive::kPrimInt;
HInstruction* opa = nullptr;
HInstruction* opb = nullptr;
- int32_t value = 0;
switch (info->induction_class) {
case HInductionVarAnalysis::kInvariant:
// Invariants.
switch (info->operation) {
case HInductionVarAnalysis::kAdd:
+ case HInductionVarAnalysis::kLT:
+ case HInductionVarAnalysis::kLE:
+ case HInductionVarAnalysis::kGT:
+ case HInductionVarAnalysis::kGE:
if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
if (graph != nullptr) {
- *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+ HInstruction* operation = nullptr;
+ switch (info->operation) {
+ case HInductionVarAnalysis::kAdd:
+ operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+ case HInductionVarAnalysis::kLT:
+ operation = new (graph->GetArena()) HLessThan(opa, opb); break;
+ case HInductionVarAnalysis::kLE:
+ operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break;
+ case HInductionVarAnalysis::kGT:
+ operation = new (graph->GetArena()) HGreaterThan(opa, opb); break;
+ case HInductionVarAnalysis::kGE:
+ operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break;
+ default:
+ LOG(FATAL) << "unknown operation";
+ }
+ *result = Insert(block, operation);
}
return true;
}
@@ -422,16 +516,21 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
}
break;
case HInductionVarAnalysis::kFetch:
- if (graph != nullptr) {
- *result = info->fetch; // already in HIR
+ if (info->fetch->GetType() == type) {
+ if (graph != nullptr) {
+ *result = info->fetch; // already in HIR
+ }
+ return true;
}
- return true;
+ break;
case HInductionVarAnalysis::kTripCountInLoop:
+ case HInductionVarAnalysis::kTripCountInLoopUnsafe:
if (!in_body && !is_min) { // one extra!
return GenerateCode(info->op_a, trip, graph, block, result, in_body, is_min);
}
FALLTHROUGH_INTENDED;
case HInductionVarAnalysis::kTripCountInBody:
+ case HInductionVarAnalysis::kTripCountInBodyUnsafe:
if (is_min) {
if (graph != nullptr) {
*result = graph->GetIntConstant(0);
@@ -452,23 +551,44 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
break;
}
break;
- case HInductionVarAnalysis::kLinear:
+ case HInductionVarAnalysis::kLinear: {
// Linear induction a * i + b, for normalized 0 <= i < TC. Restrict to unit stride only
// to avoid arithmetic wrap-around situations that are hard to guard against.
- if (GetConstant(info->op_a, &value)) {
- if (value == 1 || value == -1) {
- const bool is_min_a = value == 1 ? is_min : !is_min;
+ int32_t stride_value = 0;
+ if (GetConstant(info->op_a, &stride_value)) {
+ if (stride_value == 1 || stride_value == -1) {
+ const bool is_min_a = stride_value == 1 ? is_min : !is_min;
if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) &&
GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) {
if (graph != nullptr) {
- *result = Insert(block, new (graph->GetArena()) HAdd(type, opa, opb));
+ HInstruction* oper;
+ if (stride_value == 1) {
+ oper = new (graph->GetArena()) HAdd(type, opa, opb);
+ } else {
+ oper = new (graph->GetArena()) HSub(type, opb, opa);
+ }
+ *result = Insert(block, oper);
}
return true;
}
}
}
break;
- default: // TODO(ajcbik): add more cases
+ }
+ case HInductionVarAnalysis::kWrapAround:
+ case HInductionVarAnalysis::kPeriodic: {
+ // Wrap-around and periodic inductions are restricted to constants only, so that extreme
+ // values are easy to test at runtime without complications of arithmetic wrap-around.
+ Value extreme = GetVal(info, trip, in_body, is_min);
+ if (extreme.is_known && extreme.a_constant == 0) {
+ if (graph != nullptr) {
+ *result = graph->GetIntConstant(extreme.b_constant);
+ }
+ return true;
+ }
+ break;
+ }
+ default:
break;
}
}
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 7fa5a26dce..7984871b08 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -57,29 +57,33 @@ class InductionVarRange {
explicit InductionVarRange(HInductionVarAnalysis* induction);
/**
- * Given a context denoted by the first instruction, returns a,
- * possibly conservative, lower bound on the instruction's value.
+ * Given a context denoted by the first instruction, returns a possibly conservative
+ * lower and upper bound on the instruction's value in the output parameters min_val
+ * and max_val, respectively. The need_finite_test flag denotes if an additional finite-test
+ * is needed to protect the range evaluation inside its loop.
*/
- Value GetMinInduction(HInstruction* context, HInstruction* instruction);
+ void GetInductionRange(HInstruction* context,
+ HInstruction* instruction,
+ /*out*/Value* min_val,
+ /*out*/Value* max_val,
+ /*out*/bool* needs_finite_test);
/**
- * Given a context denoted by the first instruction, returns a,
- * possibly conservative, upper bound on the instruction's value.
+ * Returns true if range analysis is able to generate code for the lower and upper
+ * bound expressions on the instruction in the given context. The need_finite_test
+ * and need_taken test flags denote if an additional finite-test and/or taken-test
+ * are needed to protect the range evaluation inside its loop.
*/
- Value GetMaxInduction(HInstruction* context, HInstruction* instruction);
-
- /**
- * Returns true if range analysis is able to generate code for the lower and upper bound
- * expressions on the instruction in the given context. Output parameter top_test denotes
- * whether a top test is needed to protect the trip-count expression evaluation.
- */
- bool CanGenerateCode(HInstruction* context, HInstruction* instruction, /*out*/bool* top_test);
+ bool CanGenerateCode(HInstruction* context,
+ HInstruction* instruction,
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test);
/**
* Generates the actual code in the HIR for the lower and upper bound expressions on the
* instruction in the given context. Code for the lower and upper bound expression are
- * generated in given block and graph and are returned in lower and upper, respectively.
- * For a loop invariant, lower is not set.
+ * generated in given block and graph and are returned in the output parameters lower and
+ * upper, respectively. For a loop invariant, lower is not set.
*
* For example, given expression x+i with range [0, 5] for i, calling this method
* will generate the following sequence:
@@ -87,20 +91,35 @@ class InductionVarRange {
* block:
* lower: add x, 0
* upper: add x, 5
+ *
+ * Precondition: CanGenerateCode() returns true.
*/
- bool GenerateCode(HInstruction* context,
- HInstruction* instruction,
- HGraph* graph,
- HBasicBlock* block,
- /*out*/HInstruction** lower,
- /*out*/HInstruction** upper);
+ void GenerateRangeCode(HInstruction* context,
+ HInstruction* instruction,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** lower,
+ /*out*/HInstruction** upper);
+
+ /**
+ * Generates explicit taken-test for the loop in the given context. Code is generated in
+ * given block and graph. The taken-test is returned in parameter test.
+ *
+ * Precondition: CanGenerateCode() returns true and needs_taken_test is set.
+ */
+ void GenerateTakenTest(HInstruction* context,
+ HGraph* graph,
+ HBasicBlock* block,
+ /*out*/HInstruction** taken_test);
private:
//
// Private helper methods.
//
- Value GetInduction(HInstruction* context, HInstruction* instruction, bool is_min);
+ static bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info);
+ static bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip);
+ static bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip);
static Value GetFetch(HInstruction* instruction,
HInductionVarAnalysis::InductionInfo* trip,
@@ -130,8 +149,8 @@ class InductionVarRange {
static Value MergeVal(Value v1, Value v2, bool is_min);
/**
- * Generates code for lower/upper expression in the HIR. Returns true on success.
- * With graph == nullptr, the method can be used to determine if code generation
+ * Generates code for lower/upper/taken-test in the HIR. Returns true on success.
+ * With values nullptr, the method can be used to determine if code generation
* would be successful without generating actual code yet.
*/
bool GenerateCode(HInstruction* context,
@@ -140,7 +159,9 @@ class InductionVarRange {
HBasicBlock* block,
/*out*/HInstruction** lower,
/*out*/HInstruction** upper,
- bool* top_test);
+ /*out*/HInstruction** taken_test,
+ /*out*/bool* needs_finite_test,
+ /*out*/bool* needs_taken_test);
static bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* trip,
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index ce8926ad72..c2ba157ed8 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -46,6 +46,10 @@ class InductionVarRangeTest : public testing::Test {
EXPECT_EQ(v1.is_known, v2.is_known);
}
+ //
+ // Construction methods.
+ //
+
/** Constructs bare minimum graph. */
void BuildGraph() {
graph_->SetNumberOfVRegs(1);
@@ -58,7 +62,7 @@ class InductionVarRangeTest : public testing::Test {
}
/** Constructs loop with given upper bound. */
- void BuildLoop(HInstruction* upper) {
+ void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) {
// Control flow.
loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(loop_preheader_);
@@ -66,29 +70,37 @@ class InductionVarRangeTest : public testing::Test {
graph_->AddBlock(loop_header);
HBasicBlock* loop_body = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(loop_body);
+ HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_);
+ graph_->AddBlock(return_block);
entry_block_->AddSuccessor(loop_preheader_);
loop_preheader_->AddSuccessor(loop_header);
loop_header->AddSuccessor(loop_body);
- loop_header->AddSuccessor(exit_block_);
+ loop_header->AddSuccessor(return_block);
loop_body->AddSuccessor(loop_header);
+ return_block->AddSuccessor(exit_block_);
// Instructions.
HLocal* induc = new (&allocator_) HLocal(0);
entry_block_->AddInstruction(induc);
loop_preheader_->AddInstruction(
- new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(0))); // i = 0
+ new (&allocator_) HStoreLocal(induc, graph_->GetIntConstant(lower))); // i = l
loop_preheader_->AddInstruction(new (&allocator_) HGoto());
HInstruction* load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
loop_header->AddInstruction(load);
- condition_ = new (&allocator_) HLessThan(load, upper);
+ if (stride > 0) {
+ condition_ = new (&allocator_) HLessThan(load, upper); // i < u
+ } else {
+ condition_ = new (&allocator_) HGreaterThan(load, upper); // i > u
+ }
loop_header->AddInstruction(condition_);
- loop_header->AddInstruction(new (&allocator_) HIf(condition_)); // i < u
+ loop_header->AddInstruction(new (&allocator_) HIf(condition_));
load = new (&allocator_) HLoadLocal(induc, Primitive::kPrimInt);
loop_body->AddInstruction(load);
- increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(1));
+ increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, load, graph_->GetIntConstant(stride));
loop_body->AddInstruction(increment_);
- loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i++
+ loop_body->AddInstruction(new (&allocator_) HStoreLocal(induc, increment_)); // i += s
loop_body->AddInstruction(new (&allocator_) HGoto());
- exit_block_->AddInstruction(new (&allocator_) HReturnVoid());
+ return_block->AddInstruction(new (&allocator_) HReturnVoid());
+ exit_block_->AddInstruction(new (&allocator_) HExit());
}
/** Performs induction variable analysis. */
@@ -124,8 +136,20 @@ class InductionVarRangeTest : public testing::Test {
}
/** Constructs a trip-count. */
- HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc) {
- return iva_->CreateTripCount(HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+ HInductionVarAnalysis::InductionInfo* CreateTripCount(int32_t tc, bool in_loop, bool safe) {
+ if (in_loop && safe) {
+ return iva_->CreateTripCount(
+ HInductionVarAnalysis::kTripCountInLoop, CreateConst(tc), nullptr);
+ } else if (in_loop) {
+ return iva_->CreateTripCount(
+ HInductionVarAnalysis::kTripCountInLoopUnsafe, CreateConst(tc), nullptr);
+ } else if (safe) {
+ return iva_->CreateTripCount(
+ HInductionVarAnalysis::kTripCountInBody, CreateConst(tc), nullptr);
+ } else {
+ return iva_->CreateTripCount(
+ HInductionVarAnalysis::kTripCountInBodyUnsafe, CreateConst(tc), nullptr);
+ }
}
/** Constructs a linear a * i + b induction. */
@@ -139,16 +163,34 @@ class InductionVarRangeTest : public testing::Test {
HInductionVarAnalysis::kPeriodic, CreateConst(lo), CreateConst(hi));
}
+ /** Constructs a wrap-around induction consisting of a constant, followed info */
+ HInductionVarAnalysis::InductionInfo* CreateWrapAround(
+ int32_t initial,
+ HInductionVarAnalysis::InductionInfo* info) {
+ return iva_->CreateInduction(HInductionVarAnalysis::kWrapAround, CreateConst(initial), info);
+ }
+
/** Constructs a wrap-around induction consisting of a constant, followed by a range. */
HInductionVarAnalysis::InductionInfo* CreateWrapAround(int32_t initial, int32_t lo, int32_t hi) {
- return iva_->CreateInduction(
- HInductionVarAnalysis::kWrapAround, CreateConst(initial), CreateRange(lo, hi));
+ return CreateWrapAround(initial, CreateRange(lo, hi));
}
//
// Relay methods.
//
+ bool NeedsTripCount(HInductionVarAnalysis::InductionInfo* info) {
+ return InductionVarRange::NeedsTripCount(info);
+ }
+
+ bool IsBodyTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+ return InductionVarRange::IsBodyTripCount(trip);
+ }
+
+ bool IsUnsafeTripCount(HInductionVarAnalysis::InductionInfo* trip) {
+ return InductionVarRange::IsUnsafeTripCount(trip);
+ }
+
Value GetMin(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* induc) {
return InductionVarRange::GetVal(info, induc, /* in_body */ true, /* is_min */ true);
@@ -202,6 +244,26 @@ class InductionVarRangeTest : public testing::Test {
// Tests on static methods.
//
+TEST_F(InductionVarRangeTest, TripCountProperties) {
+ EXPECT_FALSE(NeedsTripCount(nullptr));
+ EXPECT_FALSE(NeedsTripCount(CreateConst(1)));
+ EXPECT_TRUE(NeedsTripCount(CreateLinear(1, 1)));
+ EXPECT_FALSE(NeedsTripCount(CreateWrapAround(1, 2, 3)));
+ EXPECT_TRUE(NeedsTripCount(CreateWrapAround(1, CreateLinear(1, 1))));
+
+ EXPECT_FALSE(IsBodyTripCount(nullptr));
+ EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, true)));
+ EXPECT_FALSE(IsBodyTripCount(CreateTripCount(100, true, false)));
+ EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, true)));
+ EXPECT_TRUE(IsBodyTripCount(CreateTripCount(100, false, false)));
+
+ EXPECT_FALSE(IsUnsafeTripCount(nullptr));
+ EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, true, true)));
+ EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, true, false)));
+ EXPECT_FALSE(IsUnsafeTripCount(CreateTripCount(100, false, true)));
+ EXPECT_TRUE(IsUnsafeTripCount(CreateTripCount(100, false, false)));
+}
+
TEST_F(InductionVarRangeTest, GetMinMaxNull) {
ExpectEqual(Value(), GetMin(nullptr, nullptr));
ExpectEqual(Value(), GetMax(nullptr, nullptr));
@@ -279,10 +341,10 @@ TEST_F(InductionVarRangeTest, GetMinMaxFetch) {
}
TEST_F(InductionVarRangeTest, GetMinMaxLinear) {
- ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100)));
- ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100)));
- ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100)));
- ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100)));
+ ExpectEqual(Value(20), GetMin(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+ ExpectEqual(Value(1010), GetMax(CreateLinear(10, 20), CreateTripCount(100, true, true)));
+ ExpectEqual(Value(-970), GetMin(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
+ ExpectEqual(Value(20), GetMax(CreateLinear(-10, 20), CreateTripCount(100, true, true)));
}
TEST_F(InductionVarRangeTest, GetMinMaxWrapAround) {
@@ -398,61 +460,98 @@ TEST_F(InductionVarRangeTest, MaxValue) {
// Tests on instance methods.
//
-TEST_F(InductionVarRangeTest, FindRangeConstantTripCount) {
- BuildLoop(graph_->GetIntConstant(1000));
+TEST_F(InductionVarRangeTest, ConstantTripCountUp) {
+ BuildLoop(0, graph_->GetIntConstant(1000), 1);
PerformInductionVarAnalysis();
InductionVarRange range(iva_);
+ Value v1, v2;
+ bool needs_finite_test = true;
+
// In context of header: known.
- ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
- ExpectEqual(Value(1000), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+ range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(1000), v2);
// In context of loop-body: known.
- ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
- ExpectEqual(Value(999), range.GetMaxInduction(increment_, condition_->InputAt(0)));
- ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
- ExpectEqual(Value(1000), range.GetMaxInduction(increment_, increment_));
+ range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(999), v2);
+ range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(1), v1);
+ ExpectEqual(Value(1000), v2);
}
-TEST_F(InductionVarRangeTest, FindRangeSymbolicTripCount) {
- HInstruction* parameter = new (&allocator_) HParameterValue(
- graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
- entry_block_->AddInstruction(parameter);
- BuildLoop(parameter);
+TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
+ BuildLoop(1000, graph_->GetIntConstant(0), -1);
PerformInductionVarAnalysis();
InductionVarRange range(iva_);
- // In context of header: full range unknown.
- ExpectEqual(Value(0), range.GetMinInduction(condition_, condition_->InputAt(0)));
- ExpectEqual(Value(), range.GetMaxInduction(condition_, condition_->InputAt(0)));
+ Value v1, v2;
+ bool needs_finite_test = true;
+
+ // In context of header: known.
+ range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(1000), v2);
// In context of loop-body: known.
- ExpectEqual(Value(0), range.GetMinInduction(increment_, condition_->InputAt(0)));
- ExpectEqual(Value(parameter, 1, -1), range.GetMaxInduction(increment_, condition_->InputAt(0)));
- ExpectEqual(Value(1), range.GetMinInduction(increment_, increment_));
- ExpectEqual(Value(parameter, 1, 0), range.GetMaxInduction(increment_, increment_));
+ range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(1), v1);
+ ExpectEqual(Value(1000), v2);
+ range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(999), v2);
}
-TEST_F(InductionVarRangeTest, CodeGeneration) {
+TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
HInstruction* parameter = new (&allocator_) HParameterValue(
graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
entry_block_->AddInstruction(parameter);
- BuildLoop(parameter);
+ BuildLoop(0, parameter, 1);
PerformInductionVarAnalysis();
InductionVarRange range(iva_);
+ Value v1, v2;
+ bool needs_finite_test = true;
+ bool needs_taken_test = true;
+
+ // In context of header: upper unknown.
+ range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(), v2);
+
+ // In context of loop-body: known.
+ range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(0), v1);
+ ExpectEqual(Value(parameter, 1, -1), v2);
+ range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(1), v1);
+ ExpectEqual(Value(parameter, 1, 0), v2);
+
HInstruction* lower = nullptr;
HInstruction* upper = nullptr;
- bool top_test = false;
+ HInstruction* taken = nullptr;
// Can generate code in context of loop-body only.
- EXPECT_FALSE(range.CanGenerateCode(condition_, condition_->InputAt(0), &top_test));
- ASSERT_TRUE(range.CanGenerateCode(increment_, condition_->InputAt(0), &top_test));
- EXPECT_TRUE(top_test);
+ EXPECT_FALSE(range.CanGenerateCode(
+ condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ ASSERT_TRUE(range.CanGenerateCode(
+ increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(needs_finite_test);
+ EXPECT_TRUE(needs_taken_test);
// Generates code.
- EXPECT_TRUE(range.GenerateCode(
- increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper));
+ range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
// Verify lower is 0+0.
ASSERT_TRUE(lower != nullptr);
@@ -462,7 +561,7 @@ TEST_F(InductionVarRangeTest, CodeGeneration) {
ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
- // Verify upper is (V-1)+0
+ // Verify upper is (V-1)+0.
ASSERT_TRUE(upper != nullptr);
ASSERT_TRUE(upper->IsAdd());
ASSERT_TRUE(upper->InputAt(0)->IsSub());
@@ -471,6 +570,91 @@ TEST_F(InductionVarRangeTest, CodeGeneration) {
EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+ // Verify taken-test is 0<V.
+ range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+ ASSERT_TRUE(taken != nullptr);
+ ASSERT_TRUE(taken->IsLessThan());
+ ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+ EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
+}
+
+TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
+ HInstruction* parameter = new (&allocator_) HParameterValue(
+ graph_->GetDexFile(), 0, 0, Primitive::kPrimInt);
+ entry_block_->AddInstruction(parameter);
+ BuildLoop(1000, parameter, -1);
+ PerformInductionVarAnalysis();
+ InductionVarRange range(iva_);
+
+ Value v1, v2;
+ bool needs_finite_test = true;
+ bool needs_taken_test = true;
+
+ // In context of header: lower unknown.
+ range.GetInductionRange(condition_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(), v1);
+ ExpectEqual(Value(1000), v2);
+
+ // In context of loop-body: known.
+ range.GetInductionRange(increment_, condition_->InputAt(0), &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(parameter, 1, 1), v1);
+ ExpectEqual(Value(1000), v2);
+ range.GetInductionRange(increment_, increment_, &v1, &v2, &needs_finite_test);
+ EXPECT_FALSE(needs_finite_test);
+ ExpectEqual(Value(parameter, 1, 0), v1);
+ ExpectEqual(Value(999), v2);
+
+ HInstruction* lower = nullptr;
+ HInstruction* upper = nullptr;
+ HInstruction* taken = nullptr;
+
+ // Can generate code in context of loop-body only.
+ EXPECT_FALSE(range.CanGenerateCode(
+ condition_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ ASSERT_TRUE(range.CanGenerateCode(
+ increment_, condition_->InputAt(0), &needs_finite_test, &needs_taken_test));
+ EXPECT_FALSE(needs_finite_test);
+ EXPECT_TRUE(needs_taken_test);
+
+ // Generates code.
+ range.GenerateRangeCode(increment_, condition_->InputAt(0), graph_, loop_preheader_, &lower, &upper);
+
+ // Verify lower is 1000-(-(V-1000)-1).
+ ASSERT_TRUE(lower != nullptr);
+ ASSERT_TRUE(lower->IsSub());
+ ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+ lower = lower->InputAt(1);
+ ASSERT_TRUE(lower->IsSub());
+ ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+ EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+ lower = lower->InputAt(0);
+ ASSERT_TRUE(lower->IsNeg());
+ lower = lower->InputAt(0);
+ ASSERT_TRUE(lower->IsSub());
+ EXPECT_TRUE(lower->InputAt(0)->IsParameterValue());
+ ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
+ EXPECT_EQ(1000, lower->InputAt(1)->AsIntConstant()->GetValue());
+
+ // Verify upper is 1000-0.
+ ASSERT_TRUE(upper != nullptr);
+ ASSERT_TRUE(upper->IsSub());
+ ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
+ ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
+ EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+
+ // Verify taken-test is 1000>V.
+ range.GenerateTakenTest(increment_, graph_, loop_preheader_, &taken);
+ ASSERT_TRUE(taken != nullptr);
+ ASSERT_TRUE(taken->IsGreaterThan());
+ ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
+ EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+ EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
}
} // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 353881e47a..6d93be37a7 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -148,7 +148,7 @@ static ArtMethod* FindVirtualOrInterfaceTarget(HInvoke* invoke, ArtMethod* resol
// the target method. Since we check above the exact type of the receiver,
// the only reason this can happen is an IncompatibleClassChangeError.
return nullptr;
- } else if (resolved_method->IsAbstract()) {
+ } else if (!resolved_method->IsInvokable()) {
// The information we had on the receiver was not enough to find
// the target method. Since we check above the exact type of the receiver,
// the only reason this can happen is an IncompatibleClassChangeError.
@@ -192,6 +192,10 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
// We can query the dex cache directly. The verifier has populated it already.
ArtMethod* resolved_method;
if (invoke_instruction->IsInvokeStaticOrDirect()) {
+ if (invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit()) {
+ VLOG(compiler) << "Not inlining a String.<init> method";
+ return false;
+ }
MethodReference ref = invoke_instruction->AsInvokeStaticOrDirect()->GetTargetMethod();
mirror::DexCache* const dex_cache = (&caller_dex_file == ref.dex_file)
? caller_compilation_unit_.GetDexCache().Get()
@@ -406,8 +410,8 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
&type_propagation,
&sharpening,
&simplify,
- &dce,
&fold,
+ &dce,
};
for (size_t i = 0; i < arraysize(optimizations); ++i) {
@@ -534,6 +538,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
ReferenceTypeInfo::Create(obj_handle, false /* is_exact */));
}
+ // Check the integrity of reference types and run another type propagation if needed.
if ((return_replacement != nullptr)
&& (return_replacement->GetType() == Primitive::kPrimNot)) {
if (!return_replacement->GetReferenceTypeInfo().IsValid()) {
@@ -544,10 +549,20 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method,
DCHECK(return_replacement->IsPhi());
size_t pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
ReferenceTypeInfo::TypeHandle return_handle =
- handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
+ handles_->NewHandle(resolved_method->GetReturnType(true /* resolve */, pointer_size));
return_replacement->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
return_handle, return_handle->CannotBeAssignedFromOtherTypes() /* is_exact */));
}
+
+ // If the return type is a refinement of the declared type run the type propagation again.
+ ReferenceTypeInfo return_rti = return_replacement->GetReferenceTypeInfo();
+ ReferenceTypeInfo invoke_rti = invoke_instruction->GetReferenceTypeInfo();
+ if (invoke_rti.IsStrictSupertypeOf(return_rti)
+ || (return_rti.IsExact() && !invoke_rti.IsExact())
+ || !return_replacement->CanBeNull()) {
+ ReferenceTypePropagation rtp_fixup(graph_, handles_);
+ rtp_fixup.Run();
+ }
}
return true;
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index b97dc1a511..2f3df7fc68 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -169,16 +169,6 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
// src
instruction->ReplaceWith(input_other);
instruction->GetBlock()->RemoveInstruction(instruction);
- } else if (instruction->IsShl() && input_cst->IsOne()) {
- // Replace Shl looking like
- // SHL dst, src, 1
- // with
- // ADD dst, src, src
- HAdd *add = new(GetGraph()->GetArena()) HAdd(instruction->GetType(),
- input_other,
- input_other);
- instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add);
- RecordSimplification();
}
}
}
@@ -372,9 +362,8 @@ void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
block->RemoveInstruction(equal);
RecordSimplification();
} else if (input_const->AsIntConstant()->IsZero()) {
- // Replace (bool_value == false) with !bool_value
- block->ReplaceAndRemoveInstructionWith(
- equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+ equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
+ block->RemoveInstruction(equal);
RecordSimplification();
} else {
// Replace (bool_value == integer_not_zero_nor_one_constant) with false
@@ -399,9 +388,8 @@ void InstructionSimplifierVisitor::VisitNotEqual(HNotEqual* not_equal) {
// We are comparing the boolean to a constant which is of type int and can
// be any constant.
if (input_const->AsIntConstant()->IsOne()) {
- // Replace (bool_value != true) with !bool_value
- block->ReplaceAndRemoveInstructionWith(
- not_equal, new (block->GetGraph()->GetArena()) HBooleanNot(input_value));
+ not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
+ block->RemoveInstruction(not_equal);
RecordSimplification();
} else if (input_const->AsIntConstant()->IsZero()) {
// Replace (bool_value != false) with bool_value
@@ -796,6 +784,34 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) {
HShl* shl = new(allocator) HShl(type, input_other, shift);
block->ReplaceAndRemoveInstructionWith(instruction, shl);
RecordSimplification();
+ } else if (IsPowerOfTwo(factor - 1)) {
+ // Transform code looking like
+ // MUL dst, src, (2^n + 1)
+ // into
+ // SHL tmp, src, n
+ // ADD dst, src, tmp
+ HShl* shl = new (allocator) HShl(type,
+ input_other,
+ GetGraph()->GetIntConstant(WhichPowerOf2(factor - 1)));
+ HAdd* add = new (allocator) HAdd(type, input_other, shl);
+
+ block->InsertInstructionBefore(shl, instruction);
+ block->ReplaceAndRemoveInstructionWith(instruction, add);
+ RecordSimplification();
+ } else if (IsPowerOfTwo(factor + 1)) {
+ // Transform code looking like
+ // MUL dst, src, (2^n - 1)
+ // into
+ // SHL tmp, src, n
+ // SUB dst, tmp, src
+ HShl* shl = new (allocator) HShl(type,
+ input_other,
+ GetGraph()->GetIntConstant(WhichPowerOf2(factor + 1)));
+ HSub* sub = new (allocator) HSub(type, shl, input_other);
+
+ block->InsertInstructionBefore(shl, instruction);
+ block->ReplaceAndRemoveInstructionWith(instruction, sub);
+ RecordSimplification();
}
}
}
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index eb79f469eb..6a34b13320 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -16,11 +16,16 @@
#include "instruction_simplifier_arm64.h"
+#include "common_arm64.h"
#include "mirror/array-inl.h"
namespace art {
namespace arm64 {
+using helpers::CanFitInShifterOperand;
+using helpers::HasShifterOperand;
+using helpers::ShifterOperandSupportsExtension;
+
void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstruction* access,
HInstruction* array,
HInstruction* index,
@@ -62,6 +67,169 @@ void InstructionSimplifierArm64Visitor::TryExtractArrayAccessAddress(HInstructio
RecordSimplification();
}
+bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use,
+ HInstruction* bitfield_op,
+ bool do_merge) {
+ DCHECK(HasShifterOperand(use));
+ DCHECK(use->IsBinaryOperation() || use->IsNeg());
+ DCHECK(CanFitInShifterOperand(bitfield_op));
+ DCHECK(!bitfield_op->HasEnvironmentUses());
+
+ Primitive::Type type = use->GetType();
+ if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) {
+ return false;
+ }
+
+ HInstruction* left;
+ HInstruction* right;
+ if (use->IsBinaryOperation()) {
+ left = use->InputAt(0);
+ right = use->InputAt(1);
+ } else {
+ DCHECK(use->IsNeg());
+ right = use->AsNeg()->InputAt(0);
+ left = GetGraph()->GetConstant(right->GetType(), 0);
+ }
+ DCHECK(left == bitfield_op || right == bitfield_op);
+
+ if (left == right) {
+ // TODO: Handle special transformations in this situation?
+ // For example should we transform `(x << 1) + (x << 1)` into `(x << 2)`?
+ // Or should this be part of a separate transformation logic?
+ return false;
+ }
+
+ bool is_commutative = use->IsBinaryOperation() && use->AsBinaryOperation()->IsCommutative();
+ HInstruction* other_input;
+ if (bitfield_op == right) {
+ other_input = left;
+ } else {
+ if (is_commutative) {
+ other_input = right;
+ } else {
+ return false;
+ }
+ }
+
+ HArm64DataProcWithShifterOp::OpKind op_kind;
+ int shift_amount = 0;
+ HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount);
+
+ if (HArm64DataProcWithShifterOp::IsExtensionOp(op_kind) &&
+ !ShifterOperandSupportsExtension(use)) {
+ return false;
+ }
+
+ if (do_merge) {
+ HArm64DataProcWithShifterOp* alu_with_op =
+ new (GetGraph()->GetArena()) HArm64DataProcWithShifterOp(use,
+ other_input,
+ bitfield_op->InputAt(0),
+ op_kind,
+ shift_amount,
+ use->GetDexPc());
+ use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op);
+ if (bitfield_op->GetUses().IsEmpty()) {
+ bitfield_op->GetBlock()->RemoveInstruction(bitfield_op);
+ }
+ RecordSimplification();
+ }
+
+ return true;
+}
+
+// Merge a bitfield move instruction into its uses if it can be merged in all of them.
+bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruction* bitfield_op) {
+ DCHECK(CanFitInShifterOperand(bitfield_op));
+
+ if (bitfield_op->HasEnvironmentUses()) {
+ return false;
+ }
+
+ const HUseList<HInstruction*>& uses = bitfield_op->GetUses();
+
+ // Check whether we can merge the instruction in all its users' shifter operand.
+ for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+ HInstruction* use = it_use.Current()->GetUser();
+ if (!HasShifterOperand(use)) {
+ return false;
+ }
+ if (!CanMergeIntoShifterOperand(use, bitfield_op)) {
+ return false;
+ }
+ }
+
+ // Merge the instruction into its uses.
+ for (HUseIterator<HInstruction*> it_use(uses); !it_use.Done(); it_use.Advance()) {
+ HInstruction* use = it_use.Current()->GetUser();
+ bool merged = MergeIntoShifterOperand(use, bitfield_op);
+ DCHECK(merged);
+ }
+
+ return true;
+}
+
+bool InstructionSimplifierArm64Visitor::TrySimpleMultiplyAccumulatePatterns(
+ HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) {
+ DCHECK(Primitive::IsIntOrLongType(mul->GetType()));
+ DCHECK(input_binop->IsAdd() || input_binop->IsSub());
+ DCHECK_NE(input_binop, input_other);
+ if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
+ return false;
+ }
+
+ // Try to interpret patterns like
+ // a * (b <+/-> 1)
+ // as
+ // (a * b) <+/-> a
+ HInstruction* input_a = input_other;
+ HInstruction* input_b = nullptr; // Set to a non-null value if we found a pattern to optimize.
+ HInstruction::InstructionKind op_kind;
+
+ if (input_binop->IsAdd()) {
+ if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
+ // Interpret
+ // a * (b + 1)
+ // as
+ // (a * b) + a
+ input_b = input_binop->GetLeastConstantLeft();
+ op_kind = HInstruction::kAdd;
+ }
+ } else {
+ DCHECK(input_binop->IsSub());
+ if (input_binop->GetRight()->IsConstant() &&
+ input_binop->GetRight()->AsConstant()->IsMinusOne()) {
+ // Interpret
+ // a * (b - (-1))
+ // as
+ // a + (a * b)
+ input_b = input_binop->GetLeft();
+ op_kind = HInstruction::kAdd;
+ } else if (input_binop->GetLeft()->IsConstant() &&
+ input_binop->GetLeft()->AsConstant()->IsOne()) {
+ // Interpret
+ // a * (1 - b)
+ // as
+ // a - (a * b)
+ input_b = input_binop->GetRight();
+ op_kind = HInstruction::kSub;
+ }
+ }
+
+ if (input_b == nullptr) {
+ // We did not find a pattern we can optimize.
+ return false;
+ }
+
+ HArm64MultiplyAccumulate* mulacc = new(GetGraph()->GetArena()) HArm64MultiplyAccumulate(
+ mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
+
+ mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
+ input_binop->GetBlock()->RemoveInstruction(input_binop);
+
+ return false;
+}
+
void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
TryExtractArrayAccessAddress(instruction,
instruction->GetArray(),
@@ -76,5 +244,110 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) {
Primitive::ComponentSize(instruction->GetComponentType()));
}
+void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) {
+ Primitive::Type type = instruction->GetType();
+ if (!Primitive::IsIntOrLongType(type)) {
+ return;
+ }
+
+ HInstruction* use = instruction->HasNonEnvironmentUses()
+ ? instruction->GetUses().GetFirst()->GetUser()
+ : nullptr;
+
+ if (instruction->HasOnlyOneNonEnvironmentUse() && (use->IsAdd() || use->IsSub())) {
+ // Replace code looking like
+ // MUL tmp, x, y
+ // SUB dst, acc, tmp
+ // with
+ // MULSUB dst, acc, x, y
+ // Note that we do not want to (unconditionally) perform the merge when the
+ // multiplication has multiple uses and it can be merged in all of them.
+ // Multiple uses could happen on the same control-flow path, and we would
+ // then increase the amount of work. In the future we could try to evaluate
+ // whether all uses are on different control-flow paths (using dominance and
+ // reverse-dominance information) and only perform the merge when they are.
+ HInstruction* accumulator = nullptr;
+ HBinaryOperation* binop = use->AsBinaryOperation();
+ HInstruction* binop_left = binop->GetLeft();
+ HInstruction* binop_right = binop->GetRight();
+ // Be careful after GVN. This should not happen since the `HMul` has only
+ // one use.
+ DCHECK_NE(binop_left, binop_right);
+ if (binop_right == instruction) {
+ accumulator = binop_left;
+ } else if (use->IsAdd()) {
+ DCHECK_EQ(binop_left, instruction);
+ accumulator = binop_right;
+ }
+
+ if (accumulator != nullptr) {
+ HArm64MultiplyAccumulate* mulacc =
+ new (GetGraph()->GetArena()) HArm64MultiplyAccumulate(type,
+ binop->GetKind(),
+ accumulator,
+ instruction->GetLeft(),
+ instruction->GetRight());
+
+ binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
+ DCHECK(!instruction->HasUses());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ RecordSimplification();
+ return;
+ }
+ }
+
+ // Use multiply accumulate instruction for a few simple patterns.
+ // We prefer not applying the following transformations if the left and
+ // right inputs perform the same operation.
+ // We rely on GVN having squashed the inputs if appropriate. However the
+ // results are still correct even if that did not happen.
+ if (instruction->GetLeft() == instruction->GetRight()) {
+ return;
+ }
+
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ if ((right->IsAdd() || right->IsSub()) &&
+ TrySimpleMultiplyAccumulatePatterns(instruction, right->AsBinaryOperation(), left)) {
+ return;
+ }
+ if ((left->IsAdd() || left->IsSub()) &&
+ TrySimpleMultiplyAccumulatePatterns(instruction, left->AsBinaryOperation(), right)) {
+ return;
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShl(HShl* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) {
+ Primitive::Type result_type = instruction->GetResultType();
+ Primitive::Type input_type = instruction->GetInputType();
+
+ if (input_type == result_type) {
+ // We let the arch-independent code handle this.
+ return;
+ }
+
+ if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
+void InstructionSimplifierArm64Visitor::VisitUShr(HUShr* instruction) {
+ if (instruction->InputAt(1)->IsConstant()) {
+ TryMergeIntoUsersShifterOperand(instruction);
+ }
+}
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index 4b697dba0e..b7f490bb8c 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -39,9 +39,30 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
HInstruction* array,
HInstruction* index,
int access_size);
+ bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
+ bool TryMergeIntoShifterOperand(HInstruction* use,
+ HInstruction* bitfield_op,
+ bool do_merge);
+ bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+ return TryMergeIntoShifterOperand(use, bitfield_op, false);
+ }
+ bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
+ DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
+ return TryMergeIntoShifterOperand(use, bitfield_op, true);
+ }
+
+ bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
+ HBinaryOperation* input_binop,
+ HInstruction* input_other);
+ // HInstruction visitors, sorted alphabetically.
void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
void VisitArraySet(HArraySet* instruction) OVERRIDE;
+ void VisitMul(HMul* instruction) OVERRIDE;
+ void VisitShl(HShl* instruction) OVERRIDE;
+ void VisitShr(HShr* instruction) OVERRIDE;
+ void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
+ void VisitUShr(HUShr* instruction) OVERRIDE;
OptimizingCompilerStats* stats_;
};
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index dbe75249be..834081188b 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -89,10 +89,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) {
}
}
-static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) {
- if (instruction_set == kMips) {
- return Intrinsics::kNone;
- }
+static Intrinsics GetIntrinsic(InlineMethod method) {
switch (method.opcode) {
// Floating-point conversions.
case kIntrinsicDoubleCvt:
@@ -387,7 +384,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke, const DexFile
// InvokeStaticOrDirect.
InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
InvokeType invoke_type = invoke->IsInvokeStaticOrDirect() ?
- invoke->AsInvokeStaticOrDirect()->GetInvokeType() :
+ invoke->AsInvokeStaticOrDirect()->GetOptimizedInvokeType() :
invoke->IsInvokeVirtual() ? kVirtual : kSuper;
switch (intrinsic_type) {
case kStatic:
@@ -431,7 +428,7 @@ void IntrinsicsRecognizer::Run() {
DexFileMethodInliner* inliner = driver_->GetMethodInlinerMap()->GetMethodInliner(&dex_file);
DCHECK(inliner != nullptr);
if (inliner->IsIntrinsic(invoke->GetDexMethodIndex(), &method)) {
- Intrinsics intrinsic = GetIntrinsic(method, graph_->GetInstructionSet());
+ Intrinsics intrinsic = GetIntrinsic(method);
if (intrinsic != Intrinsics::kNone) {
if (!CheckInvokeType(intrinsic, invoke, dex_file)) {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 0a5acc3e64..d2017da221 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -44,7 +44,23 @@ using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitor
bool IntrinsicLocationsBuilderARM::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
- return res != nullptr && res->Intrinsified();
+ if (res == nullptr) {
+ return false;
+ }
+ if (kEmitCompilerReadBarrier && res->CanCall()) {
+ // Generating an intrinsic for this HInvoke may produce an
+ // IntrinsicSlowPathARM slow path. Currently this approach
+ // does not work when using read barriers, as the emitted
+ // calling sequence will make use of another slow path
+ // (ReadBarrierForRootSlowPathARM for HInvokeStaticOrDirect,
+ // ReadBarrierSlowPathARM for HInvokeVirtual). So we bail
+ // out in this case.
+ //
+ // TODO: Find a way to have intrinsics work with read barriers.
+ invoke->SetLocations(nullptr);
+ return false;
+ }
+ return res->Intrinsified();
}
#define __ assembler->
@@ -662,20 +678,23 @@ static void GenUnsafeGet(HInvoke* invoke,
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
ArmAssembler* assembler = codegen->GetAssembler();
- Register base = locations->InAt(1).AsRegister<Register>(); // Object pointer.
- Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); // Long offset, lo part only.
+ Location base_loc = locations->InAt(1);
+ Register base = base_loc.AsRegister<Register>(); // Object pointer.
+ Location offset_loc = locations->InAt(2);
+ Register offset = offset_loc.AsRegisterPairLow<Register>(); // Long offset, lo part only.
+ Location trg_loc = locations->Out();
if (type == Primitive::kPrimLong) {
- Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
__ add(IP, base, ShifterOperand(offset));
if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
- Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
+ Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
__ ldrexd(trg_lo, trg_hi, IP);
} else {
__ ldrd(trg_lo, Address(IP));
}
} else {
- Register trg = locations->Out().AsRegister<Register>();
+ Register trg = trg_loc.AsRegister<Register>();
__ ldr(trg, Address(base, offset));
}
@@ -684,14 +703,18 @@ static void GenUnsafeGet(HInvoke* invoke,
}
if (type == Primitive::kPrimNot) {
- Register trg = locations->Out().AsRegister<Register>();
- __ MaybeUnpoisonHeapReference(trg);
+ codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
}
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ can_call ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -936,6 +959,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ Bind(&loop_head);
__ ldrex(tmp_lo, tmp_ptr);
+ // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
__ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
@@ -964,7 +988,11 @@ void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
// The UnsafeCASObject intrinsic does not always work when heap
// poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
// off temporarily as a quick fix.
+ //
// TODO(rpl): Fix it and turn it back on.
+ //
+ // TODO(rpl): Also, we should investigate whether we need a read
+ // barrier in the generated code.
if (kPoisonHeapReferences) {
return;
}
@@ -1400,6 +1428,10 @@ static void CheckPosition(ArmAssembler* assembler,
}
}
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathARM
+// slow path.
void IntrinsicCodeGeneratorARM::VisitSystemArrayCopy(HInvoke* invoke) {
ArmAssembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 059abf090d..b04dcceb05 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -143,7 +143,23 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
- return res != nullptr && res->Intrinsified();
+ if (res == nullptr) {
+ return false;
+ }
+ if (kEmitCompilerReadBarrier && res->CanCall()) {
+ // Generating an intrinsic for this HInvoke may produce an
+ // IntrinsicSlowPathARM64 slow path. Currently this approach
+ // does not work when using read barriers, as the emitted
+ // calling sequence will make use of another slow path
+ // (ReadBarrierForRootSlowPathARM64 for HInvokeStaticOrDirect,
+ // ReadBarrierSlowPathARM64 for HInvokeVirtual). So we bail
+ // out in this case.
+ //
+ // TODO: Find a way to have intrinsics work with read barriers.
+ invoke->SetLocations(nullptr);
+ return false;
+ }
+ return res->Intrinsified();
}
#define __ masm->
@@ -818,9 +834,12 @@ static void GenUnsafeGet(HInvoke* invoke,
(type == Primitive::kPrimLong) ||
(type == Primitive::kPrimNot));
vixl::MacroAssembler* masm = codegen->GetAssembler()->vixl_masm_;
- Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
- Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
- Register trg = RegisterFrom(locations->Out(), type);
+ Location base_loc = locations->InAt(1);
+ Register base = WRegisterFrom(base_loc); // Object pointer.
+ Location offset_loc = locations->InAt(2);
+ Register offset = XRegisterFrom(offset_loc); // Long offset.
+ Location trg_loc = locations->Out();
+ Register trg = RegisterFrom(trg_loc, type);
bool use_acquire_release = codegen->GetInstructionSetFeatures().PreferAcquireRelease();
MemOperand mem_op(base.X(), offset);
@@ -837,13 +856,18 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == Primitive::kPrimNot) {
DCHECK(trg.IsW());
- codegen->GetAssembler()->MaybeUnpoisonHeapReference(trg);
+ codegen->MaybeGenerateReadBarrier(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
}
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ can_call ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1057,6 +1081,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
if (use_acquire_release) {
__ Bind(&loop_head);
__ Ldaxr(tmp_value, MemOperand(tmp_ptr));
+ // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
__ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1065,6 +1092,9 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
__ Dmb(InnerShareable, BarrierWrites);
__ Bind(&loop_head);
__ Ldxr(tmp_value, MemOperand(tmp_ptr));
+ // TODO: Do we need a read barrier here when `type == Primitive::kPrimNot`?
+ // Note that this code is not (yet) used when read barriers are
+ // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
__ Cmp(tmp_value, expected);
__ B(&exit_loop, ne);
__ Stxr(tmp_32, value, MemOperand(tmp_ptr));
@@ -1090,7 +1120,11 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
// The UnsafeCASObject intrinsic does not always work when heap
// poisoning is enabled (it breaks run-test 004-UnsafeTest); turn it
// off temporarily as a quick fix.
+ //
// TODO(rpl): Fix it and turn it back on.
+ //
+ // TODO(rpl): Also, we should investigate whether we need a read
+ // barrier in the generated code.
if (kPoisonHeapReferences) {
return;
}
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5efcf4eadf..326844526e 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -138,6 +138,323 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) {
#define __ assembler->
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+
+ if (is64bit) {
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ __ Mfc1(out_lo, in);
+ __ Mfhc1(out_hi, in);
+ } else {
+ Register out = locations->Out().AsRegister<Register>();
+
+ __ Mfc1(out, in);
+ }
+}
+
+// long java.lang.Double.doubleToRawLongBits(double)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
+}
+
+// int java.lang.Float.floatToRawIntBits(float)
+void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+ MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ if (is64bit) {
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+
+ __ Mtc1(in_lo, out);
+ __ Mthc1(in_hi, out);
+ } else {
+ Register in = locations->InAt(0).AsRegister<Register>();
+
+ __ Mtc1(in, out);
+ }
+}
+
+// double java.lang.Double.longBitsToDouble(long)
+void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
+}
+
+// float java.lang.Float.intBitsToFloat(int)
+void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+ MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ LocationSummary* locations = new (arena) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenReverseBytes(LocationSummary* locations,
+ Primitive::Type type,
+ MipsAssembler* assembler,
+ bool isR2OrNewer) {
+ DCHECK(type == Primitive::kPrimShort ||
+ type == Primitive::kPrimInt ||
+ type == Primitive::kPrimLong);
+
+ if (type == Primitive::kPrimShort) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (isR2OrNewer) {
+ __ Wsbh(out, in);
+ __ Seh(out, out);
+ } else {
+ __ Sll(TMP, in, 24);
+ __ Sra(TMP, TMP, 16);
+ __ Sll(out, in, 16);
+ __ Srl(out, out, 24);
+ __ Or(out, out, TMP);
+ }
+ } else if (type == Primitive::kPrimInt) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (isR2OrNewer) {
+ __ Rotr(out, in, 16);
+ __ Wsbh(out, out);
+ } else {
+ // MIPS32r1
+ // __ Rotr(out, in, 16);
+ __ Sll(TMP, in, 16);
+ __ Srl(out, in, 16);
+ __ Or(out, out, TMP);
+ // __ Wsbh(out, out);
+ __ LoadConst32(AT, 0x00FF00FF);
+ __ And(TMP, out, AT);
+ __ Sll(TMP, TMP, 8);
+ __ Srl(out, out, 8);
+ __ And(out, out, AT);
+ __ Or(out, out, TMP);
+ }
+ } else if (type == Primitive::kPrimLong) {
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ if (isR2OrNewer) {
+ __ Rotr(AT, in_hi, 16);
+ __ Rotr(TMP, in_lo, 16);
+ __ Wsbh(out_lo, AT);
+ __ Wsbh(out_hi, TMP);
+ } else {
+ // When calling CreateIntToIntLocations() we promised that the
+ // use of the out_lo/out_hi wouldn't overlap with the use of
+ // in_lo/in_hi. Be very careful not to write to out_lo/out_hi
+ // until we're completely done reading from in_lo/in_hi.
+ // __ Rotr(TMP, in_lo, 16);
+ __ Sll(TMP, in_lo, 16);
+ __ Srl(AT, in_lo, 16);
+ __ Or(TMP, TMP, AT); // Hold in TMP until it's safe
+ // to write to out_hi.
+ // __ Rotr(out_lo, in_hi, 16);
+ __ Sll(AT, in_hi, 16);
+ __ Srl(out_lo, in_hi, 16); // Here we are finally done reading
+ // from in_lo/in_hi so it's okay to
+ // write to out_lo/out_hi.
+ __ Or(out_lo, out_lo, AT);
+ // __ Wsbh(out_hi, out_hi);
+ __ LoadConst32(AT, 0x00FF00FF);
+ __ And(out_hi, TMP, AT);
+ __ Sll(out_hi, out_hi, 8);
+ __ Srl(TMP, TMP, 8);
+ __ And(TMP, TMP, AT);
+ __ Or(out_hi, out_hi, TMP);
+ // __ Wsbh(out_lo, out_lo);
+ __ And(TMP, out_lo, AT); // AT already holds the correct mask value
+ __ Sll(TMP, TMP, 8);
+ __ Srl(out_lo, out_lo, 8);
+ __ And(out_lo, out_lo, AT);
+ __ Or(out_lo, out_lo, TMP);
+ }
+ }
+}
+
+// int java.lang.Integer.reverseBytes(int)
+void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(),
+ Primitive::kPrimInt,
+ GetAssembler(),
+ codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// long java.lang.Long.reverseBytes(long)
+void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(),
+ Primitive::kPrimLong,
+ GetAssembler(),
+ codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// short java.lang.Short.reverseBytes(short)
+void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+ CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
+ GenReverseBytes(invoke->GetLocations(),
+ Primitive::kPrimShort,
+ GetAssembler(),
+ codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2());
+}
+
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+
+ // Temporary registers to store lengths of strings and for calculations.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
+ MipsAssembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register str = locations->InAt(0).AsRegister<Register>();
+ Register arg = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+ Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+ Register temp3 = locations->GetTemp(2).AsRegister<Register>();
+
+ MipsLabel loop;
+ MipsLabel end;
+ MipsLabel return_true;
+ MipsLabel return_false;
+
+ // Get offsets of count, value, and class fields within a string object.
+ const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+ const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+ const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // If the register containing the pointer to "this", and the register
+ // containing the pointer to "anObject" are the same register then
+ // "this", and "anObject" are the same object and we can
+ // short-circuit the logic to a true result.
+ if (str == arg) {
+ __ LoadConst32(out, 1);
+ return;
+ }
+
+ // Check if input is null, return false if it is.
+ __ Beqz(arg, &return_false);
+
+ // Reference equality check, return true if same reference.
+ __ Beq(str, arg, &return_true);
+
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bne(temp1, temp2, &return_false);
+
+ // Load lengths of this and argument strings.
+ __ Lw(temp1, str, count_offset);
+ __ Lw(temp2, arg, count_offset);
+ // Check if lengths are equal, return false if they're not.
+ __ Bne(temp1, temp2, &return_false);
+ // Return true if both strings are empty.
+ __ Beqz(temp1, &return_true);
+
+ // Don't overwrite input registers
+ __ Move(TMP, str);
+ __ Move(temp3, arg);
+
+ // Assertions that must hold in order to compare strings 2 characters at a time.
+ DCHECK_ALIGNED(value_offset, 4);
+ static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
+
+ // Loop to compare strings 2 characters at a time starting at the beginning of the string.
+ // Ok to do this because strings are zero-padded.
+ __ Bind(&loop);
+ __ Lw(out, TMP, value_offset);
+ __ Lw(temp2, temp3, value_offset);
+ __ Bne(out, temp2, &return_false);
+ __ Addiu(TMP, TMP, 4);
+ __ Addiu(temp3, temp3, 4);
+ __ Addiu(temp1, temp1, -2);
+ __ Bgtz(temp1, &loop);
+
+ // Return true and exit the function.
+ // If loop does not result in returning false, we return true.
+ __ Bind(&return_true);
+ __ LoadConst32(out, 1);
+ __ B(&end);
+
+ // Return false and exit the function.
+ __ Bind(&return_false);
+ __ LoadConst32(out, 0);
+ __ Bind(&end);
+}
+
// Unimplemented intrinsics.
#define UNIMPLEMENTED_INTRINSIC(Name) \
@@ -148,15 +465,8 @@ void IntrinsicCodeGeneratorMIPS::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED)
UNIMPLEMENTED_INTRINSIC(IntegerReverse)
UNIMPLEMENTED_INTRINSIC(LongReverse)
-UNIMPLEMENTED_INTRINSIC(ShortReverseBytes)
-UNIMPLEMENTED_INTRINSIC(IntegerReverseBytes)
-UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
UNIMPLEMENTED_INTRINSIC(LongNumberOfLeadingZeros)
UNIMPLEMENTED_INTRINSIC(IntegerNumberOfLeadingZeros)
-UNIMPLEMENTED_INTRINSIC(FloatIntBitsToFloat)
-UNIMPLEMENTED_INTRINSIC(DoubleLongBitsToDouble)
-UNIMPLEMENTED_INTRINSIC(FloatFloatToRawIntBits)
-UNIMPLEMENTED_INTRINSIC(DoubleDoubleToRawLongBits)
UNIMPLEMENTED_INTRINSIC(MathAbsDouble)
UNIMPLEMENTED_INTRINSIC(MathAbsFloat)
UNIMPLEMENTED_INTRINSIC(MathAbsInt)
@@ -204,7 +514,6 @@ UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
UNIMPLEMENTED_INTRINSIC(StringCharAt)
UNIMPLEMENTED_INTRINSIC(StringCompareTo)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
UNIMPLEMENTED_INTRINSIC(StringIndexOf)
UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 05c7eb02d9..ecee11dea6 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -101,11 +101,10 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
if (invoke_->IsInvokeStaticOrDirect()) {
codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
Location::RegisterLocation(A0));
- codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
} else {
- UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
- UNREACHABLE();
+ codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
}
+ codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
// Copy the result back to the expected output.
Location out = invoke_->GetLocations()->Out();
@@ -116,7 +115,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
}
RestoreLiveRegisters(codegen, invoke_->GetLocations());
- __ B(GetExitLabel());
+ __ Bc(GetExitLabel());
}
const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
@@ -807,7 +806,7 @@ static void GenRoundingMode(LocationSummary* locations,
DCHECK_NE(in, out);
- Label done;
+ Mips64Label done;
// double floor/ceil(double in) {
// if in.isNaN || in.isInfinite || in.isZero {
@@ -1257,7 +1256,7 @@ static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGenerat
// } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
// result = tmp_value != 0;
- Label loop_head, exit_loop;
+ Mips64Label loop_head, exit_loop;
__ Daddu(TMP, base, offset);
__ Sync(0);
__ Bind(&loop_head);
@@ -1392,6 +1391,108 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) {
__ Bind(slow_path->GetExitLabel());
}
+// boolean java.lang.String.equals(Object anObject)
+void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
+ LocationSummary* locations = new (arena_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+
+ // Temporary registers to store lengths of strings and for calculations.
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
+ Mips64Assembler* assembler = GetAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ GpuRegister str = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister arg = locations->InAt(1).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+ GpuRegister temp2 = locations->GetTemp(1).AsRegister<GpuRegister>();
+ GpuRegister temp3 = locations->GetTemp(2).AsRegister<GpuRegister>();
+
+ Mips64Label loop;
+ Mips64Label end;
+ Mips64Label return_true;
+ Mips64Label return_false;
+
+ // Get offsets of count, value, and class fields within a string object.
+ const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+ const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+ const int32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+ // Note that the null check must have been done earlier.
+ DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+ // If the register containing the pointer to "this", and the register
+ // containing the pointer to "anObject" are the same register then
+ // "this", and "anObject" are the same object and we can
+ // short-circuit the logic to a true result.
+ if (str == arg) {
+ __ LoadConst64(out, 1);
+ return;
+ }
+
+ // Check if input is null, return false if it is.
+ __ Beqzc(arg, &return_false);
+
+ // Reference equality check, return true if same reference.
+ __ Beqc(str, arg, &return_true);
+
+ // Instanceof check for the argument by comparing class fields.
+ // All string objects must have the same type since String cannot be subclassed.
+ // Receiver must be a string object, so its class field is equal to all strings' class fields.
+ // If the argument is a string object, its class field must be equal to receiver's class field.
+ __ Lw(temp1, str, class_offset);
+ __ Lw(temp2, arg, class_offset);
+ __ Bnec(temp1, temp2, &return_false);
+
+ // Load lengths of this and argument strings.
+ __ Lw(temp1, str, count_offset);
+ __ Lw(temp2, arg, count_offset);
+ // Check if lengths are equal, return false if they're not.
+ __ Bnec(temp1, temp2, &return_false);
+ // Return true if both strings are empty.
+ __ Beqzc(temp1, &return_true);
+
+ // Don't overwrite input registers
+ __ Move(TMP, str);
+ __ Move(temp3, arg);
+
+ // Assertions that must hold in order to compare strings 4 characters at a time.
+ DCHECK_ALIGNED(value_offset, 8);
+ static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
+
+ // Loop to compare strings 4 characters at a time starting at the beginning of the string.
+ // Ok to do this because strings are zero-padded to be 8-byte aligned.
+ __ Bind(&loop);
+ __ Ld(out, TMP, value_offset);
+ __ Ld(temp2, temp3, value_offset);
+ __ Bnec(out, temp2, &return_false);
+ __ Daddiu(TMP, TMP, 8);
+ __ Daddiu(temp3, temp3, 8);
+ __ Addiu(temp1, temp1, -4);
+ __ Bgtzc(temp1, &loop);
+
+ // Return true and exit the function.
+ // If loop does not result in returning false, we return true.
+ __ Bind(&return_true);
+ __ LoadConst64(out, 1);
+ __ Bc(&end);
+
+ // Return false and exit the function.
+ __ Bind(&return_false);
+ __ LoadConst64(out, 0);
+ __ Bind(&end);
+}
+
static void GenerateStringIndexOf(HInvoke* invoke,
Mips64Assembler* assembler,
CodeGeneratorMIPS64* codegen,
@@ -1413,7 +1514,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// full slow-path down and branch unconditionally.
slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke);
codegen->AddSlowPath(slow_path);
- __ B(slow_path->GetEntryLabel());
+ __ Bc(slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
return;
}
@@ -1587,8 +1688,6 @@ void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSE
UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
-UNIMPLEMENTED_INTRINSIC(StringEquals)
-
UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 040bf6a45e..371588fc47 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -55,7 +55,23 @@ ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
LocationSummary* res = invoke->GetLocations();
- return res != nullptr && res->Intrinsified();
+ if (res == nullptr) {
+ return false;
+ }
+ if (kEmitCompilerReadBarrier && res->CanCall()) {
+ // Generating an intrinsic for this HInvoke may produce an
+ // IntrinsicSlowPathX86 slow path. Currently this approach
+ // does not work when using read barriers, as the emitted
+ // calling sequence will make use of another slow path
+ // (ReadBarrierForRootSlowPathX86 for HInvokeStaticOrDirect,
+ // ReadBarrierSlowPathX86 for HInvokeVirtual). So we bail
+ // out in this case.
+ //
+ // TODO: Find a way to have intrinsics work with read barriers.
+ invoke->SetLocations(nullptr);
+ return false;
+ }
+ return res->Intrinsified();
}
static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
@@ -1571,26 +1587,32 @@ void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
}
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
- bool is_volatile, X86Assembler* assembler) {
- Register base = locations->InAt(1).AsRegister<Register>();
- Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
- Location output = locations->Out();
+static void GenUnsafeGet(HInvoke* invoke,
+ Primitive::Type type,
+ bool is_volatile,
+ CodeGeneratorX86* codegen) {
+ X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+ Location base_loc = locations->InAt(1);
+ Register base = base_loc.AsRegister<Register>();
+ Location offset_loc = locations->InAt(2);
+ Register offset = offset_loc.AsRegisterPairLow<Register>();
+ Location output_loc = locations->Out();
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimNot: {
- Register output_reg = output.AsRegister<Register>();
- __ movl(output_reg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ Register output = output_loc.AsRegister<Register>();
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
if (type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(output_reg);
+ codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
}
break;
}
case Primitive::kPrimLong: {
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
+ Register output_lo = output_loc.AsRegisterPairLow<Register>();
+ Register output_hi = output_loc.AsRegisterPairHigh<Register>();
if (is_volatile) {
// Need to use a XMM to read atomically.
XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
@@ -1613,8 +1635,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
bool is_long, bool is_volatile) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ can_call ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1653,22 +1680,22 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke)
void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
}
@@ -1890,13 +1917,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
__ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
- // locked cmpxchg has full barrier semantics, and we don't need
+ // LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
// Convert ZF into the boolean result.
__ setb(kZero, out.AsRegister<Register>());
__ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
+ // In the case of the `UnsafeCASObject` intrinsic, accessing an
+ // object in the heap with LOCK CMPXCHG does not require a read
+ // barrier, as we do not keep a reference to this heap location.
+ // However, if heap poisoning is enabled, we need to unpoison the
+ // values that were poisoned earlier.
if (kPoisonHeapReferences) {
if (base_equals_value) {
// `value` has been moved to a temporary register, no need to
@@ -1929,8 +1961,8 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code
LOG(FATAL) << "Unexpected CAS type " << type;
}
- // locked cmpxchg has full barrier semantics, and we don't need
- // scheduling barriers at this time.
+ // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
+ // don't need scheduling barriers at this time.
// Convert ZF into the boolean result.
__ setb(kZero, out.AsRegister<Register>());
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 14c65c9aaf..2d9f01b821 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -50,8 +50,24 @@ ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
Dispatch(invoke);
- const LocationSummary* res = invoke->GetLocations();
- return res != nullptr && res->Intrinsified();
+ LocationSummary* res = invoke->GetLocations();
+ if (res == nullptr) {
+ return false;
+ }
+ if (kEmitCompilerReadBarrier && res->CanCall()) {
+ // Generating an intrinsic for this HInvoke may produce an
+ // IntrinsicSlowPathX86_64 slow path. Currently this approach
+ // does not work when using read barriers, as the emitted
+ // calling sequence will make use of another slow path
+ // (ReadBarrierForRootSlowPathX86_64 for HInvokeStaticOrDirect,
+ // ReadBarrierSlowPathX86_64 for HInvokeVirtual). So we bail
+ // out in this case.
+ //
+ // TODO: Find a way to have intrinsics work with read barriers.
+ invoke->SetLocations(nullptr);
+ return false;
+ }
+ return res->Intrinsified();
}
static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
@@ -917,6 +933,10 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
}
+// TODO: Implement read barriers in the SystemArrayCopy intrinsic.
+// Note that this code path is not used (yet) because we do not
+// intrinsify methods that can go into the IntrinsicSlowPathX86_64
+// slow path.
void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
X86_64Assembler* assembler = GetAssembler();
LocationSummary* locations = invoke->GetLocations();
@@ -1605,7 +1625,7 @@ static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke)
LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
+ locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
}
static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
@@ -1698,23 +1718,30 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
}
-static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
- bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
- CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
- CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
- CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
+static void GenUnsafeGet(HInvoke* invoke,
+ Primitive::Type type,
+ bool is_volatile ATTRIBUTE_UNUSED,
+ CodeGeneratorX86_64* codegen) {
+ X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
+ LocationSummary* locations = invoke->GetLocations();
+ Location base_loc = locations->InAt(1);
+ CpuRegister base = base_loc.AsRegister<CpuRegister>();
+ Location offset_loc = locations->InAt(2);
+ CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
+ Location output_loc = locations->Out();
+ CpuRegister output = locations->Out().AsRegister<CpuRegister>();
switch (type) {
case Primitive::kPrimInt:
case Primitive::kPrimNot:
- __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
if (type == Primitive::kPrimNot) {
- __ MaybeUnpoisonHeapReference(trg);
+ codegen->MaybeGenerateReadBarrier(invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
}
break;
case Primitive::kPrimLong:
- __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
+ __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
break;
default:
@@ -1724,8 +1751,13 @@ static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
}
static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+ bool can_call = kEmitCompilerReadBarrier &&
+ (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+ invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
LocationSummary* locations = new (arena) LocationSummary(invoke,
- LocationSummary::kNoCall,
+ can_call ?
+ LocationSummary::kCallOnSlowPath :
+ LocationSummary::kNoCall,
kIntrinsified);
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -1754,22 +1786,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimInt, false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimInt, true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimLong, false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimLong, true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimNot, false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
+ GenUnsafeGet(invoke, Primitive::kPrimNot, true, codegen_);
}
@@ -1961,13 +1993,18 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
__ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
- // locked cmpxchg has full barrier semantics, and we don't need
+ // LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
// Convert ZF into the boolean result.
__ setcc(kZero, out);
__ movzxb(out, out);
+ // In the case of the `UnsafeCASObject` intrinsic, accessing an
+ // object in the heap with LOCK CMPXCHG does not require a read
+ // barrier, as we do not keep a reference to this heap location.
+ // However, if heap poisoning is enabled, we need to unpoison the
+ // values that were poisoned earlier.
if (kPoisonHeapReferences) {
if (base_equals_value) {
// `value_reg` has been moved to a temporary register, no need
@@ -1992,7 +2029,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c
LOG(FATAL) << "Unexpected CAS type " << type;
}
- // locked cmpxchg has full barrier semantics, and we don't need
+ // LOCK CMPXCHG has full barrier semantics, and we don't need
// scheduling barriers at this time.
// Convert ZF into the boolean result.
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 47457dec7d..2bb769a430 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -42,12 +42,14 @@ class LICMTest : public testing::Test {
loop_preheader_ = new (&allocator_) HBasicBlock(graph_);
loop_header_ = new (&allocator_) HBasicBlock(graph_);
loop_body_ = new (&allocator_) HBasicBlock(graph_);
+ return_ = new (&allocator_) HBasicBlock(graph_);
exit_ = new (&allocator_) HBasicBlock(graph_);
graph_->AddBlock(entry_);
graph_->AddBlock(loop_preheader_);
graph_->AddBlock(loop_header_);
graph_->AddBlock(loop_body_);
+ graph_->AddBlock(return_);
graph_->AddBlock(exit_);
graph_->SetEntryBlock(entry_);
@@ -57,8 +59,9 @@ class LICMTest : public testing::Test {
entry_->AddSuccessor(loop_preheader_);
loop_preheader_->AddSuccessor(loop_header_);
loop_header_->AddSuccessor(loop_body_);
- loop_header_->AddSuccessor(exit_);
+ loop_header_->AddSuccessor(return_);
loop_body_->AddSuccessor(loop_header_);
+ return_->AddSuccessor(exit_);
// Provide boiler-plate instructions.
parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), 0, 0, Primitive::kPrimNot);
@@ -89,6 +92,7 @@ class LICMTest : public testing::Test {
HBasicBlock* loop_preheader_;
HBasicBlock* loop_header_;
HBasicBlock* loop_body_;
+ HBasicBlock* return_;
HBasicBlock* exit_;
HInstruction* parameter_; // "this"
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 6fbb6823d6..5b89cfef5a 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -119,19 +119,10 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> {
: ref_info_(ref_info),
offset_(offset),
index_(index),
- declaring_class_def_index_(declaring_class_def_index),
- may_become_unknown_(true) {
+ declaring_class_def_index_(declaring_class_def_index) {
DCHECK(ref_info != nullptr);
DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
(offset != kInvalidFieldOffset && index == nullptr));
-
- if (ref_info->IsSingletonAndNotReturned()) {
- // We try to track stores to singletons that aren't returned to eliminate the stores
- // since values in singleton's fields cannot be killed due to aliasing. Those values
- // can still be killed due to merging values since we don't build phi for merging heap
- // values. SetMayBecomeUnknown(true) may be called later once such merge becomes possible.
- may_become_unknown_ = false;
- }
}
ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
@@ -148,21 +139,11 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> {
return index_ != nullptr;
}
- // Returns true if this heap location's value may become unknown after it's
- // set to a value, due to merge of values, or killed due to aliasing.
- bool MayBecomeUnknown() const {
- return may_become_unknown_;
- }
- void SetMayBecomeUnknown(bool val) {
- may_become_unknown_ = val;
- }
-
private:
ReferenceInfo* const ref_info_; // reference for instance/static field or array access.
const size_t offset_; // offset of static/instance field.
HInstruction* const index_; // index of an array element.
const int16_t declaring_class_def_index_; // declaring class's def's dex index.
- bool may_become_unknown_; // value may become kUnknownHeapValue.
DISALLOW_COPY_AND_ASSIGN(HeapLocation);
};
@@ -381,26 +362,13 @@ class HeapLocationCollector : public HGraphVisitor {
return heap_locations_[heap_location_idx];
}
- void VisitFieldAccess(HInstruction* field_access,
- HInstruction* ref,
- const FieldInfo& field_info,
- bool is_store) {
+ void VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
if (field_info.IsVolatile()) {
has_volatile_ = true;
}
const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
const size_t offset = field_info.GetFieldOffset().SizeValue();
- HeapLocation* location = GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
- // A store of a value may be eliminated if all future loads for that value can be eliminated.
- // For a value that's stored into a singleton field, the value will not be killed due
- // to aliasing. However if the value is set in a block that doesn't post dominate the definition,
- // the value may be killed due to merging later. Before we have post dominating info, we check
- // if the store is in the same block as the definition just to be conservative.
- if (is_store &&
- location->GetReferenceInfo()->IsSingletonAndNotReturned() &&
- field_access->GetBlock() != ref->GetBlock()) {
- location->SetMayBecomeUnknown(true);
- }
+ GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
}
void VisitArrayAccess(HInstruction* array, HInstruction* index) {
@@ -409,20 +377,20 @@ class HeapLocationCollector : public HGraphVisitor {
}
void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
- VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
}
void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
- VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
}
void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
- VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), false);
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
}
void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
- VisitFieldAccess(instruction, instruction->InputAt(0), instruction->GetFieldInfo(), true);
+ VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
}
@@ -464,9 +432,14 @@ class HeapLocationCollector : public HGraphVisitor {
};
// An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
+// A heap location can be set to kUnknownHeapValue when:
+// - initially set a value.
+// - killed due to aliasing, merging, invocation, or loop side effects.
static HInstruction* const kUnknownHeapValue =
reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-1));
+
// Default heap value after an allocation.
+// A heap location can be set to that value right after an allocation.
static HInstruction* const kDefaultHeapValue =
reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2));
@@ -484,29 +457,17 @@ class LSEVisitor : public HGraphVisitor {
kUnknownHeapValue,
graph->GetArena()->Adapter(kArenaAllocLSE)),
graph->GetArena()->Adapter(kArenaAllocLSE)),
- removed_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
- substitute_instructions_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+ possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
}
void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
- int block_id = block->GetBlockId();
- ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+ // Populate the heap_values array for this block.
// TODO: try to reuse the heap_values array from one predecessor if possible.
if (block->IsLoopHeader()) {
- // We do a single pass in reverse post order. For loops, use the side effects as a hint
- // to see if the heap values should be killed.
- if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
- // Leave all values as kUnknownHeapValue.
- } else {
- // Inherit the values from pre-header.
- HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
- ArenaVector<HInstruction*>& pre_header_heap_values =
- heap_values_for_[pre_header->GetBlockId()];
- for (size_t i = 0; i < heap_values.size(); i++) {
- heap_values[i] = pre_header_heap_values[i];
- }
- }
+ HandleLoopSideEffects(block);
} else {
MergePredecessorValues(block);
}
@@ -515,23 +476,34 @@ class LSEVisitor : public HGraphVisitor {
// Remove recorded instructions that should be eliminated.
void RemoveInstructions() {
- size_t size = removed_instructions_.size();
- DCHECK_EQ(size, substitute_instructions_.size());
+ size_t size = removed_loads_.size();
+ DCHECK_EQ(size, substitute_instructions_for_loads_.size());
for (size_t i = 0; i < size; i++) {
- HInstruction* instruction = removed_instructions_[i];
- DCHECK(instruction != nullptr);
- HInstruction* substitute = substitute_instructions_[i];
- if (substitute != nullptr) {
- // Keep tracing substitute till one that's not removed.
- HInstruction* sub_sub = FindSubstitute(substitute);
- while (sub_sub != substitute) {
- substitute = sub_sub;
- sub_sub = FindSubstitute(substitute);
- }
- instruction->ReplaceWith(substitute);
+ HInstruction* load = removed_loads_[i];
+ DCHECK(load != nullptr);
+ DCHECK(load->IsInstanceFieldGet() ||
+ load->IsStaticFieldGet() ||
+ load->IsArrayGet());
+ HInstruction* substitute = substitute_instructions_for_loads_[i];
+ DCHECK(substitute != nullptr);
+ // Keep tracing substitute till one that's not removed.
+ HInstruction* sub_sub = FindSubstitute(substitute);
+ while (sub_sub != substitute) {
+ substitute = sub_sub;
+ sub_sub = FindSubstitute(substitute);
}
- instruction->GetBlock()->RemoveInstruction(instruction);
+ load->ReplaceWith(substitute);
+ load->GetBlock()->RemoveInstruction(load);
}
+
+ // At this point, stores in possibly_removed_stores_ can be safely removed.
+ size = possibly_removed_stores_.size();
+ for (size_t i = 0; i < size; i++) {
+ HInstruction* store = possibly_removed_stores_[i];
+ DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
+ store->GetBlock()->RemoveInstruction(store);
+ }
+
// TODO: remove unnecessary allocations.
// Eliminate instructions in singleton_new_instances_ that:
// - don't have uses,
@@ -541,6 +513,52 @@ class LSEVisitor : public HGraphVisitor {
}
private:
+ // If heap_values[index] is an instance field store, need to keep the store.
+ // This is necessary if a heap value is killed due to merging, or loop side
+ // effects (which is essentially merging also), since a load later from the
+ // location won't be eliminated.
+ void KeepIfIsStore(HInstruction* heap_value) {
+ if (heap_value == kDefaultHeapValue ||
+ heap_value == kUnknownHeapValue ||
+ !heap_value->IsInstanceFieldSet()) {
+ return;
+ }
+ auto idx = std::find(possibly_removed_stores_.begin(),
+ possibly_removed_stores_.end(), heap_value);
+ if (idx != possibly_removed_stores_.end()) {
+ // Make sure the store is kept.
+ possibly_removed_stores_.erase(idx);
+ }
+ }
+
+ void HandleLoopSideEffects(HBasicBlock* block) {
+ DCHECK(block->IsLoopHeader());
+ int block_id = block->GetBlockId();
+ ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id];
+ HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader();
+ ArenaVector<HInstruction*>& pre_header_heap_values =
+ heap_values_for_[pre_header->GetBlockId()];
+ // We do a single pass in reverse post order. For loops, use the side effects as a hint
+ // to see if the heap values should be killed.
+ if (side_effects_.GetLoopEffects(block).DoesAnyWrite()) {
+ for (size_t i = 0; i < pre_header_heap_values.size(); i++) {
+ // heap value is killed by loop side effects, need to keep the last store.
+ KeepIfIsStore(pre_header_heap_values[i]);
+ }
+ if (kIsDebugBuild) {
+ // heap_values should all be kUnknownHeapValue that it is inited with.
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ DCHECK_EQ(heap_values[i], kUnknownHeapValue);
+ }
+ }
+ } else {
+ // Inherit the values from pre-header.
+ for (size_t i = 0; i < heap_values.size(); i++) {
+ heap_values[i] = pre_header_heap_values[i];
+ }
+ }
+ }
+
void MergePredecessorValues(HBasicBlock* block) {
const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
if (predecessors.size() == 0) {
@@ -548,16 +566,25 @@ class LSEVisitor : public HGraphVisitor {
}
ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
for (size_t i = 0; i < heap_values.size(); i++) {
- HInstruction* value = heap_values_for_[predecessors[0]->GetBlockId()][i];
- if (value != kUnknownHeapValue) {
+ HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
+ heap_values[i] = pred0_value;
+ if (pred0_value != kUnknownHeapValue) {
for (size_t j = 1; j < predecessors.size(); j++) {
- if (heap_values_for_[predecessors[j]->GetBlockId()][i] != value) {
- value = kUnknownHeapValue;
+ HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
+ if (pred_value != pred0_value) {
+ heap_values[i] = kUnknownHeapValue;
break;
}
}
}
- heap_values[i] = value;
+
+ if (heap_values[i] == kUnknownHeapValue) {
+ // Keep the last store in each predecessor since future loads cannot be eliminated.
+ for (size_t j = 0; j < predecessors.size(); j++) {
+ ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+ KeepIfIsStore(pred_values[i]);
+ }
+ }
}
}
@@ -616,21 +643,30 @@ class LSEVisitor : public HGraphVisitor {
HInstruction* heap_value = heap_values[idx];
if (heap_value == kDefaultHeapValue) {
HInstruction* constant = GetDefaultValue(instruction->GetType());
- removed_instructions_.push_back(instruction);
- substitute_instructions_.push_back(constant);
+ removed_loads_.push_back(instruction);
+ substitute_instructions_for_loads_.push_back(constant);
heap_values[idx] = constant;
return;
}
+ if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
+ HInstruction* store = heap_value;
+ // This load must be from a singleton since it's from the same field
+ // that a "removed" store puts the value. That store must be to a singleton's field.
+ DCHECK(ref_info->IsSingleton());
+ // Get the real heap value of the store.
+ heap_value = store->InputAt(1);
+ }
if ((heap_value != kUnknownHeapValue) &&
// Keep the load due to possible I/F, J/D array aliasing.
// See b/22538329 for details.
(heap_value->GetType() == instruction->GetType())) {
- removed_instructions_.push_back(instruction);
- substitute_instructions_.push_back(heap_value);
+ removed_loads_.push_back(instruction);
+ substitute_instructions_for_loads_.push_back(heap_value);
TryRemovingNullCheck(instruction);
return;
}
+ // Load isn't eliminated.
if (heap_value == kUnknownHeapValue) {
// Put the load as the value into the HeapLocation.
// This acts like GVN but with better aliasing analysis.
@@ -662,51 +698,63 @@ class LSEVisitor : public HGraphVisitor {
ArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
HInstruction* heap_value = heap_values[idx];
- bool redundant_store = false;
+ bool same_value = false;
+ bool possibly_redundant = false;
if (Equal(heap_value, value)) {
// Store into the heap location with the same value.
- redundant_store = true;
+ same_value = true;
} else if (index != nullptr) {
// For array element, don't eliminate stores since it can be easily aliased
// with non-constant index.
} else if (!heap_location_collector_.MayDeoptimize() &&
- ref_info->IsSingletonAndNotReturned() &&
- !heap_location_collector_.GetHeapLocation(idx)->MayBecomeUnknown()) {
- // Store into a field of a singleton that's not returned. And that value cannot be
- // killed due to merge. It's redundant since future loads will get the value
- // set by this instruction.
- Primitive::Type type = Primitive::kPrimVoid;
- if (instruction->IsInstanceFieldSet()) {
- type = instruction->AsInstanceFieldSet()->GetFieldInfo().GetFieldType();
- } else if (instruction->IsStaticFieldSet()) {
- type = instruction->AsStaticFieldSet()->GetFieldInfo().GetFieldType();
- } else {
- DCHECK(false) << "Must be an instance/static field set instruction.";
- }
- if (value->GetType() != type) {
- // I/F, J/D aliasing should not happen for fields.
- DCHECK(Primitive::IsIntegralType(value->GetType()));
- DCHECK(!Primitive::Is64BitType(value->GetType()));
- DCHECK(Primitive::IsIntegralType(type));
- DCHECK(!Primitive::Is64BitType(type));
- // Keep the store since the corresponding load isn't eliminated due to different types.
- // TODO: handle the different int types so that we can eliminate this store.
- redundant_store = false;
+ ref_info->IsSingletonAndNotReturned()) {
+ // Store into a field of a singleton that's not returned. The value cannot be
+ // killed due to aliasing/invocation. It can be redundant since future loads can
+ // directly get the value set by this instruction. The value can still be killed due to
+ // merging or loop side effects. Stores whose values are killed due to merging/loop side
+ // effects later will be removed from possibly_removed_stores_ when that is detected.
+ possibly_redundant = true;
+ HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
+ DCHECK(new_instance != nullptr);
+ if (new_instance->IsFinalizable()) {
+ // Finalizable objects escape globally. Need to keep the store.
+ possibly_redundant = false;
} else {
- redundant_store = true;
+ HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+ if (loop_info != nullptr) {
+ // instruction is a store in the loop so the loop must does write.
+ DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
+
+ if (loop_info->IsLoopInvariant(original_ref, false)) {
+ DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
+ // Keep the store since its value may be needed at the loop header.
+ possibly_redundant = false;
+ } else {
+ // The singleton is created inside the loop. Value stored to it isn't needed at
+ // the loop header. This is true for outer loops also.
+ }
+ }
}
- // TODO: eliminate the store if the singleton object is not finalizable.
- redundant_store = false;
}
- if (redundant_store) {
- removed_instructions_.push_back(instruction);
- substitute_instructions_.push_back(nullptr);
- TryRemovingNullCheck(instruction);
+ if (same_value || possibly_redundant) {
+ possibly_removed_stores_.push_back(instruction);
}
- heap_values[idx] = value;
+ if (!same_value) {
+ if (possibly_redundant) {
+ DCHECK(instruction->IsInstanceFieldSet());
+ // Put the store as the heap value. If the value is loaded from heap
+ // by a load later, this store isn't really redundant.
+ heap_values[idx] = instruction;
+ } else {
+ heap_values[idx] = value;
+ }
+ }
// This store may kill values in other heap locations due to aliasing.
for (size_t i = 0; i < heap_values.size(); i++) {
+ if (i == idx) {
+ continue;
+ }
if (heap_values[i] == value) {
// Same value should be kept even if aliasing happens.
continue;
@@ -834,9 +882,10 @@ class LSEVisitor : public HGraphVisitor {
return;
}
if (!heap_location_collector_.MayDeoptimize() &&
- ref_info->IsSingletonAndNotReturned()) {
- // The allocation might be eliminated.
- singleton_new_instances_.push_back(new_instance);
+ ref_info->IsSingletonAndNotReturned() &&
+ !new_instance->IsFinalizable() &&
+ !new_instance->CanThrow()) {
+ // TODO: add new_instance to singleton_new_instances_ and enable allocation elimination.
}
ArenaVector<HInstruction*>& heap_values =
heap_values_for_[new_instance->GetBlock()->GetBlockId()];
@@ -854,10 +903,10 @@ class LSEVisitor : public HGraphVisitor {
// Find an instruction's substitute if it should be removed.
// Return the same instruction if it should not be removed.
HInstruction* FindSubstitute(HInstruction* instruction) {
- size_t size = removed_instructions_.size();
+ size_t size = removed_loads_.size();
for (size_t i = 0; i < size; i++) {
- if (removed_instructions_[i] == instruction) {
- return substitute_instructions_[i];
+ if (removed_loads_[i] == instruction) {
+ return substitute_instructions_for_loads_[i];
}
}
return instruction;
@@ -871,8 +920,13 @@ class LSEVisitor : public HGraphVisitor {
// We record the instructions that should be eliminated but may be
// used by heap locations. They'll be removed in the end.
- ArenaVector<HInstruction*> removed_instructions_;
- ArenaVector<HInstruction*> substitute_instructions_;
+ ArenaVector<HInstruction*> removed_loads_;
+ ArenaVector<HInstruction*> substitute_instructions_for_loads_;
+
+ // Stores in this list may be removed from the list later when it's
+ // found that the store cannot be eliminated.
+ ArenaVector<HInstruction*> possibly_removed_stores_;
+
ArenaVector<HInstruction*> singleton_new_instances_;
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index ebdf7a2f65..1ab206f69e 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -17,6 +17,7 @@
#include "locations.h"
#include "nodes.h"
+#include "code_generator.h"
namespace art {
@@ -47,18 +48,26 @@ Location Location::RegisterOrConstant(HInstruction* instruction) {
: Location::RequiresRegister();
}
-Location Location::RegisterOrInt32LongConstant(HInstruction* instruction) {
- if (instruction->IsIntConstant() || instruction->IsNullConstant()) {
- return Location::ConstantLocation(instruction->AsConstant());
- } else if (instruction->IsLongConstant()) {
- // Does the long constant fit in a 32 bit int?
- int64_t value = instruction->AsLongConstant()->GetValue();
- return IsInt<32>(value)
- ? Location::ConstantLocation(instruction->AsConstant())
- : Location::RequiresRegister();
- } else {
- return Location::RequiresRegister();
+Location Location::RegisterOrInt32Constant(HInstruction* instruction) {
+ HConstant* constant = instruction->AsConstant();
+ if (constant != nullptr) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+ if (IsInt<32>(value)) {
+ return Location::ConstantLocation(constant);
+ }
}
+ return Location::RequiresRegister();
+}
+
+Location Location::FpuRegisterOrInt32Constant(HInstruction* instruction) {
+ HConstant* constant = instruction->AsConstant();
+ if (constant != nullptr) {
+ int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+ if (IsInt<32>(value)) {
+ return Location::ConstantLocation(constant);
+ }
+ }
+ return Location::RequiresFpuRegister();
}
Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
@@ -67,6 +76,12 @@ Location Location::ByteRegisterOrConstant(int reg, HInstruction* instruction) {
: Location::RegisterLocation(reg);
}
+Location Location::FpuRegisterOrConstant(HInstruction* instruction) {
+ return instruction->IsConstant()
+ ? Location::ConstantLocation(instruction->AsConstant())
+ : Location::RequiresFpuRegister();
+}
+
std::ostream& operator<<(std::ostream& os, const Location& location) {
os << location.DebugString();
if (location.IsRegister() || location.IsFpuRegister()) {
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index d014379bca..63bbc2cd0a 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -354,8 +354,10 @@ class Location : public ValueObject {
}
static Location RegisterOrConstant(HInstruction* instruction);
- static Location RegisterOrInt32LongConstant(HInstruction* instruction);
+ static Location RegisterOrInt32Constant(HInstruction* instruction);
static Location ByteRegisterOrConstant(int reg, HInstruction* instruction);
+ static Location FpuRegisterOrConstant(HInstruction* instruction);
+ static Location FpuRegisterOrInt32Constant(HInstruction* instruction);
// The location of the first input to the instruction will be
// used to replace this unallocated location.
@@ -592,6 +594,10 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
return intrinsified_;
}
+ void SetIntrinsified(bool intrinsified) {
+ intrinsified_ = intrinsified;
+ }
+
private:
ArenaVector<Location> inputs_;
ArenaVector<Location> temps_;
@@ -611,7 +617,7 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> {
RegisterSet live_registers_;
// Whether these are locations for an intrinsified call.
- const bool intrinsified_;
+ bool intrinsified_;
ART_FRIEND_TEST(RegisterAllocatorTest, ExpectedInRegisterHint);
ART_FRIEND_TEST(RegisterAllocatorTest, SameAsFirstInputHint);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 68fb0acf7f..9b26de44fe 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -17,6 +17,7 @@
#include "nodes.h"
#include "code_generator.h"
+#include "common_dominator.h"
#include "ssa_builder.h"
#include "base/bit_vector-inl.h"
#include "base/bit_utils.h"
@@ -179,7 +180,10 @@ void HGraph::ComputeDominanceInformation() {
if (successor->GetDominator() == nullptr) {
successor->SetDominator(current);
} else {
- successor->SetDominator(FindCommonDominator(successor->GetDominator(), current));
+ // The CommonDominator can work for multiple blocks as long as the
+ // domination information doesn't change. However, since we're changing
+ // that information here, we can use the finder only for pairs of blocks.
+ successor->SetDominator(CommonDominator::ForPair(successor->GetDominator(), current));
}
// Once all the forward edges have been visited, we know the immediate
@@ -194,24 +198,6 @@ void HGraph::ComputeDominanceInformation() {
}
}
-HBasicBlock* HGraph::FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const {
- ArenaBitVector visited(arena_, blocks_.size(), false);
- // Walk the dominator tree of the first block and mark the visited blocks.
- while (first != nullptr) {
- visited.SetBit(first->GetBlockId());
- first = first->GetDominator();
- }
- // Walk the dominator tree of the second block until a marked block is found.
- while (second != nullptr) {
- if (visited.IsBitSet(second->GetBlockId())) {
- return second;
- }
- second = second->GetDominator();
- }
- LOG(ERROR) << "Could not find common dominator";
- return nullptr;
-}
-
void HGraph::TransformToSsa() {
DCHECK(!reverse_post_order_.empty());
SsaBuilder ssa_builder(this);
@@ -335,14 +321,24 @@ void HGraph::SimplifyCatchBlocks() {
// instructions into `normal_block` and links the two blocks with a Goto.
// Afterwards, incoming normal-flow edges are re-linked to `normal_block`,
// leaving `catch_block` with the exceptional edges only.
+ //
// Note that catch blocks with normal-flow predecessors cannot begin with
- // a MOVE_EXCEPTION instruction, as guaranteed by the verifier.
- DCHECK(!catch_block->GetFirstInstruction()->IsLoadException());
- HBasicBlock* normal_block = catch_block->SplitBefore(catch_block->GetFirstInstruction());
- for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
- if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
- catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
- --j;
+ // a move-exception instruction, as guaranteed by the verifier. However,
+ // trivially dead predecessors are ignored by the verifier and such code
+ // has not been removed at this stage. We therefore ignore the assumption
+ // and rely on GraphChecker to enforce it after initial DCE is run (b/25492628).
+ HBasicBlock* normal_block = catch_block->SplitCatchBlockAfterMoveException();
+ if (normal_block == nullptr) {
+ // Catch block is either empty or only contains a move-exception. It must
+ // therefore be dead and will be removed during initial DCE. Do nothing.
+ DCHECK(!catch_block->EndsWithControlFlowInstruction());
+ } else {
+ // Catch block was split. Re-link normal-flow edges to the new block.
+ for (size_t j = 0; j < catch_block->GetPredecessors().size(); ++j) {
+ if (!CheckIfPredecessorAtIsExceptional(*catch_block, j)) {
+ catch_block->GetPredecessors()[j]->ReplaceSuccessor(catch_block, normal_block);
+ --j;
+ }
}
}
}
@@ -366,28 +362,45 @@ void HGraph::ComputeTryBlockInformation() {
HBasicBlock* first_predecessor = block->GetPredecessors()[0];
DCHECK(!block->IsLoopHeader() || !block->GetLoopInformation()->IsBackEdge(*first_predecessor));
const HTryBoundary* try_entry = first_predecessor->ComputeTryEntryOfSuccessors();
- if (try_entry != nullptr) {
+ if (try_entry != nullptr &&
+ (block->GetTryCatchInformation() == nullptr ||
+ try_entry != &block->GetTryCatchInformation()->GetTryEntry())) {
+ // We are either setting try block membership for the first time or it
+ // has changed.
block->SetTryCatchInformation(new (arena_) TryCatchInformation(*try_entry));
}
}
}
void HGraph::SimplifyCFG() {
- // Simplify the CFG for future analysis, and code generation:
+// Simplify the CFG for future analysis, and code generation:
// (1): Split critical edges.
- // (2): Simplify loops by having only one back edge, and one preheader.
+ // (2): Simplify loops by having only one preheader.
// NOTE: We're appending new blocks inside the loop, so we need to use index because iterators
// can be invalidated. We remember the initial size to avoid iterating over the new blocks.
for (size_t block_id = 0u, end = blocks_.size(); block_id != end; ++block_id) {
HBasicBlock* block = blocks_[block_id];
if (block == nullptr) continue;
- if (block->NumberOfNormalSuccessors() > 1) {
- for (size_t j = 0; j < block->GetSuccessors().size(); ++j) {
- HBasicBlock* successor = block->GetSuccessors()[j];
+ if (block->GetSuccessors().size() > 1) {
+ // Only split normal-flow edges. We cannot split exceptional edges as they
+ // are synthesized (approximate real control flow), and we do not need to
+ // anyway. Moves that would be inserted there are performed by the runtime.
+ ArrayRef<HBasicBlock* const> normal_successors = block->GetNormalSuccessors();
+ for (size_t j = 0, e = normal_successors.size(); j < e; ++j) {
+ HBasicBlock* successor = normal_successors[j];
DCHECK(!successor->IsCatchBlock());
- if (successor->GetPredecessors().size() > 1) {
+ if (successor == exit_block_) {
+ // Throw->TryBoundary->Exit. Special case which we do not want to split
+ // because Goto->Exit is not allowed.
+ DCHECK(block->IsSingleTryBoundary());
+ DCHECK(block->GetSinglePredecessor()->GetLastInstruction()->IsThrow());
+ } else if (successor->GetPredecessors().size() > 1) {
SplitCriticalEdge(block, successor);
- --j;
+ // SplitCriticalEdge could have invalidated the `normal_successors`
+ // ArrayRef. We must re-acquire it.
+ normal_successors = block->GetNormalSuccessors();
+ DCHECK_EQ(normal_successors[j]->GetSingleSuccessor(), successor);
+ DCHECK_EQ(e, normal_successors.size());
}
}
}
@@ -1082,6 +1095,8 @@ HConstant* HBinaryOperation::TryStaticEvaluation() const {
} else if (GetRight()->IsLongConstant()) {
return Evaluate(GetLeft()->AsLongConstant(), GetRight()->AsLongConstant());
}
+ } else if (GetLeft()->IsNullConstant() && GetRight()->IsNullConstant()) {
+ return Evaluate(GetLeft()->AsNullConstant(), GetRight()->AsNullConstant());
}
return nullptr;
}
@@ -1162,8 +1177,61 @@ void HInstruction::MoveBefore(HInstruction* cursor) {
}
}
+void HInstruction::MoveBeforeFirstUserAndOutOfLoops() {
+ DCHECK(!CanThrow());
+ DCHECK(!HasSideEffects());
+ DCHECK(!HasEnvironmentUses());
+ DCHECK(HasNonEnvironmentUses());
+ DCHECK(!IsPhi()); // Makes no sense for Phi.
+ DCHECK_EQ(InputCount(), 0u);
+
+ // Find the target block.
+ HUseIterator<HInstruction*> uses_it(GetUses());
+ HBasicBlock* target_block = uses_it.Current()->GetUser()->GetBlock();
+ uses_it.Advance();
+ while (!uses_it.Done() && uses_it.Current()->GetUser()->GetBlock() == target_block) {
+ uses_it.Advance();
+ }
+ if (!uses_it.Done()) {
+ // This instruction has uses in two or more blocks. Find the common dominator.
+ CommonDominator finder(target_block);
+ for (; !uses_it.Done(); uses_it.Advance()) {
+ finder.Update(uses_it.Current()->GetUser()->GetBlock());
+ }
+ target_block = finder.Get();
+ DCHECK(target_block != nullptr);
+ }
+ // Move to the first dominator not in a loop.
+ while (target_block->IsInLoop()) {
+ target_block = target_block->GetDominator();
+ DCHECK(target_block != nullptr);
+ }
+
+ // Find insertion position.
+ HInstruction* insert_pos = nullptr;
+ for (HUseIterator<HInstruction*> uses_it2(GetUses()); !uses_it2.Done(); uses_it2.Advance()) {
+ if (uses_it2.Current()->GetUser()->GetBlock() == target_block &&
+ (insert_pos == nullptr || uses_it2.Current()->GetUser()->StrictlyDominates(insert_pos))) {
+ insert_pos = uses_it2.Current()->GetUser();
+ }
+ }
+ if (insert_pos == nullptr) {
+ // No user in `target_block`, insert before the control flow instruction.
+ insert_pos = target_block->GetLastInstruction();
+ DCHECK(insert_pos->IsControlFlow());
+ // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+ if (insert_pos->IsIf()) {
+ HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+ if (if_input == insert_pos->GetPrevious()) {
+ insert_pos = if_input;
+ }
+ }
+ }
+ MoveBefore(insert_pos);
+}
+
HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
- DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+ DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
DCHECK_EQ(cursor->GetBlock(), this);
HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(),
@@ -1193,7 +1261,7 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) {
}
HBasicBlock* HBasicBlock::CreateImmediateDominator() {
- DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented";
+ DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented.";
HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
@@ -1209,6 +1277,34 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() {
return new_block;
}
+HBasicBlock* HBasicBlock::SplitCatchBlockAfterMoveException() {
+ DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented.";
+ DCHECK(IsCatchBlock()) << "This method is intended for catch blocks only.";
+
+ HInstruction* first_insn = GetFirstInstruction();
+ HInstruction* split_before = nullptr;
+
+ if (first_insn != nullptr && first_insn->IsLoadException()) {
+ // Catch block starts with a LoadException. Split the block after
+ // the StoreLocal and ClearException which must come after the load.
+ DCHECK(first_insn->GetNext()->IsStoreLocal());
+ DCHECK(first_insn->GetNext()->GetNext()->IsClearException());
+ split_before = first_insn->GetNext()->GetNext()->GetNext();
+ } else {
+ // Catch block does not load the exception. Split at the beginning
+ // to create an empty catch block.
+ split_before = first_insn;
+ }
+
+ if (split_before == nullptr) {
+ // Catch block has no instructions after the split point (must be dead).
+ // Do not split it but rather signal error by returning nullptr.
+ return nullptr;
+ } else {
+ return SplitBefore(split_before);
+ }
+}
+
HBasicBlock* HBasicBlock::SplitAfter(HInstruction* cursor) {
DCHECK(!cursor->IsControlFlow());
DCHECK_NE(instructions_.last_instruction_, cursor);
@@ -1293,17 +1389,38 @@ bool HBasicBlock::HasSinglePhi() const {
return !GetPhis().IsEmpty() && GetFirstPhi()->GetNext() == nullptr;
}
+ArrayRef<HBasicBlock* const> HBasicBlock::GetNormalSuccessors() const {
+ if (EndsWithTryBoundary()) {
+ // The normal-flow successor of HTryBoundary is always stored at index zero.
+ DCHECK_EQ(successors_[0], GetLastInstruction()->AsTryBoundary()->GetNormalFlowSuccessor());
+ return ArrayRef<HBasicBlock* const>(successors_).SubArray(0u, 1u);
+ } else {
+ // All successors of blocks not ending with TryBoundary are normal.
+ return ArrayRef<HBasicBlock* const>(successors_);
+ }
+}
+
+ArrayRef<HBasicBlock* const> HBasicBlock::GetExceptionalSuccessors() const {
+ if (EndsWithTryBoundary()) {
+ return GetLastInstruction()->AsTryBoundary()->GetExceptionHandlers();
+ } else {
+ // Blocks not ending with TryBoundary do not have exceptional successors.
+ return ArrayRef<HBasicBlock* const>();
+ }
+}
+
bool HTryBoundary::HasSameExceptionHandlersAs(const HTryBoundary& other) const {
- if (GetBlock()->GetSuccessors().size() != other.GetBlock()->GetSuccessors().size()) {
+ ArrayRef<HBasicBlock* const> handlers1 = GetExceptionHandlers();
+ ArrayRef<HBasicBlock* const> handlers2 = other.GetExceptionHandlers();
+
+ size_t length = handlers1.size();
+ if (length != handlers2.size()) {
return false;
}
// Exception handlers need to be stored in the same order.
- for (HExceptionHandlerIterator it1(*this), it2(other);
- !it1.Done();
- it1.Advance(), it2.Advance()) {
- DCHECK(!it2.Done());
- if (it1.Current() != it2.Current()) {
+ for (size_t i = 0; i < length; ++i) {
+ if (handlers1[i] != handlers2[i]) {
return false;
}
}
@@ -1356,7 +1473,7 @@ void HBasicBlock::DisconnectAndDelete() {
// iteration.
DCHECK(dominated_blocks_.empty());
- // Remove the block from all loops it is included in.
+ // (1) Remove the block from all loops it is included in.
for (HLoopInformationOutwardIterator it(*this); !it.Done(); it.Advance()) {
HLoopInformation* loop_info = it.Current();
loop_info->Remove(this);
@@ -1368,17 +1485,34 @@ void HBasicBlock::DisconnectAndDelete() {
}
}
- // Disconnect the block from its predecessors and update their control-flow
- // instructions.
+ // (2) Disconnect the block from its predecessors and update their
+ // control-flow instructions.
for (HBasicBlock* predecessor : predecessors_) {
HInstruction* last_instruction = predecessor->GetLastInstruction();
+ if (last_instruction->IsTryBoundary() && !IsCatchBlock()) {
+ // This block is the only normal-flow successor of the TryBoundary which
+ // makes `predecessor` dead. Since DCE removes blocks in post order,
+ // exception handlers of this TryBoundary were already visited and any
+ // remaining handlers therefore must be live. We remove `predecessor` from
+ // their list of predecessors.
+ DCHECK_EQ(last_instruction->AsTryBoundary()->GetNormalFlowSuccessor(), this);
+ while (predecessor->GetSuccessors().size() > 1) {
+ HBasicBlock* handler = predecessor->GetSuccessors()[1];
+ DCHECK(handler->IsCatchBlock());
+ predecessor->RemoveSuccessor(handler);
+ handler->RemovePredecessor(predecessor);
+ }
+ }
+
predecessor->RemoveSuccessor(this);
uint32_t num_pred_successors = predecessor->GetSuccessors().size();
if (num_pred_successors == 1u) {
// If we have one successor after removing one, then we must have
- // had an HIf or HPackedSwitch, as they have more than one successor.
- // Replace those with a HGoto.
- DCHECK(last_instruction->IsIf() || last_instruction->IsPackedSwitch());
+ // had an HIf, HPackedSwitch or HTryBoundary, as they have more than one
+ // successor. Replace those with a HGoto.
+ DCHECK(last_instruction->IsIf() ||
+ last_instruction->IsPackedSwitch() ||
+ (last_instruction->IsTryBoundary() && IsCatchBlock()));
predecessor->RemoveInstruction(last_instruction);
predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc()));
} else if (num_pred_successors == 0u) {
@@ -1387,15 +1521,17 @@ void HBasicBlock::DisconnectAndDelete() {
// SSAChecker fails unless it is not removed during the pass too.
predecessor->RemoveInstruction(last_instruction);
} else {
- // There are multiple successors left. This must come from a HPackedSwitch
- // and we are in the middle of removing the HPackedSwitch. Like above, leave
- // this alone, and the SSAChecker will fail if it is not removed as well.
- DCHECK(last_instruction->IsPackedSwitch());
+ // There are multiple successors left. The removed block might be a successor
+ // of a PackedSwitch which will be completely removed (perhaps replaced with
+ // a Goto), or we are deleting a catch block from a TryBoundary. In either
+ // case, leave `last_instruction` as is for now.
+ DCHECK(last_instruction->IsPackedSwitch() ||
+ (last_instruction->IsTryBoundary() && IsCatchBlock()));
}
}
predecessors_.clear();
- // Disconnect the block from its successors and update their phis.
+ // (3) Disconnect the block from its successors and update their phis.
for (HBasicBlock* successor : successors_) {
// Delete this block from the list of predecessors.
size_t this_index = successor->GetPredecessorIndexOf(this);
@@ -1405,30 +1541,57 @@ void HBasicBlock::DisconnectAndDelete() {
// dominator of `successor` which violates the order DCHECKed at the top.
DCHECK(!successor->predecessors_.empty());
- // Remove this block's entries in the successor's phis.
- if (successor->predecessors_.size() == 1u) {
- // The successor has just one predecessor left. Replace phis with the only
- // remaining input.
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- HPhi* phi = phi_it.Current()->AsPhi();
- phi->ReplaceWith(phi->InputAt(1 - this_index));
- successor->RemovePhi(phi);
- }
- } else {
- for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
- phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+ // Remove this block's entries in the successor's phis. Skip exceptional
+ // successors because catch phi inputs do not correspond to predecessor
+ // blocks but throwing instructions. Their inputs will be updated in step (4).
+ if (!successor->IsCatchBlock()) {
+ if (successor->predecessors_.size() == 1u) {
+ // The successor has just one predecessor left. Replace phis with the only
+ // remaining input.
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ HPhi* phi = phi_it.Current()->AsPhi();
+ phi->ReplaceWith(phi->InputAt(1 - this_index));
+ successor->RemovePhi(phi);
+ }
+ } else {
+ for (HInstructionIterator phi_it(successor->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(this_index);
+ }
}
}
}
successors_.clear();
+ // (4) Remove instructions and phis. Instructions should have no remaining uses
+ // except in catch phis. If an instruction is used by a catch phi at `index`,
+ // remove `index`-th input of all phis in the catch block since they are
+ // guaranteed dead. Note that we may miss dead inputs this way but the
+ // graph will always remain consistent.
+ for (HBackwardInstructionIterator it(GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* insn = it.Current();
+ while (insn->HasUses()) {
+ DCHECK(IsTryBlock());
+ HUseListNode<HInstruction*>* use = insn->GetUses().GetFirst();
+ size_t use_index = use->GetIndex();
+ HBasicBlock* user_block = use->GetUser()->GetBlock();
+ DCHECK(use->GetUser()->IsPhi() && user_block->IsCatchBlock());
+ for (HInstructionIterator phi_it(user_block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
+ phi_it.Current()->AsPhi()->RemoveInputAt(use_index);
+ }
+ }
+
+ RemoveInstruction(insn);
+ }
+ for (HInstructionIterator it(GetPhis()); !it.Done(); it.Advance()) {
+ RemovePhi(it.Current()->AsPhi());
+ }
+
// Disconnect from the dominator.
dominator_->RemoveDominatedBlock(this);
SetDominator(nullptr);
- // Delete from the graph. The function safely deletes remaining instructions
- // and updates the reverse post order.
- graph_->DeleteDeadBlock(this);
+ // Delete from the graph, update reverse post order.
+ graph_->DeleteDeadEmptyBlock(this);
SetGraph(nullptr);
}
@@ -1475,7 +1638,7 @@ void HBasicBlock::MergeWith(HBasicBlock* other) {
other->predecessors_.clear();
// Delete `other` from the graph. The function updates reverse post order.
- graph_->DeleteDeadBlock(other);
+ graph_->DeleteDeadEmptyBlock(other);
other->SetGraph(nullptr);
}
@@ -1539,19 +1702,14 @@ static void MakeRoomFor(ArenaVector<HBasicBlock*>* blocks,
std::copy_backward(blocks->begin() + after + 1u, blocks->begin() + old_size, blocks->end());
}
-void HGraph::DeleteDeadBlock(HBasicBlock* block) {
+void HGraph::DeleteDeadEmptyBlock(HBasicBlock* block) {
DCHECK_EQ(block->GetGraph(), this);
DCHECK(block->GetSuccessors().empty());
DCHECK(block->GetPredecessors().empty());
DCHECK(block->GetDominatedBlocks().empty());
DCHECK(block->GetDominator() == nullptr);
-
- for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- block->RemoveInstruction(it.Current());
- }
- for (HBackwardInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- block->RemovePhi(it.Current()->AsPhi());
- }
+ DCHECK(block->GetInstructions().IsEmpty());
+ DCHECK(block->GetPhis().IsEmpty());
if (block->IsExitBlock()) {
exit_block_ = nullptr;
@@ -1654,6 +1812,9 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
// (2) the reverse post order of that graph,
// (3) the potential loop information they are now in,
// (4) try block membership.
+ // Note that we do not need to update catch phi inputs because they
+ // correspond to the register file of the outer method which the inlinee
+ // cannot modify.
// We don't add the entry block, the exit block, and the first block, which
// has been merged with `at`.
@@ -1782,7 +1943,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
* |
* if_block
* / \
- * dummy_block deopt_block
+ * true_block false_block
* \ /
* new_pre_header
* |
@@ -1790,62 +1951,73 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
*/
void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
DCHECK(header->IsLoopHeader());
- HBasicBlock* pre_header = header->GetDominator();
+ HBasicBlock* old_pre_header = header->GetDominator();
- // Need this to avoid critical edge.
+ // Need extra block to avoid critical edge.
HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc());
- // Need this to avoid critical edge.
- HBasicBlock* dummy_block = new (arena_) HBasicBlock(this, header->GetDexPc());
- HBasicBlock* deopt_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc());
+ HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc());
HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
AddBlock(if_block);
- AddBlock(dummy_block);
- AddBlock(deopt_block);
+ AddBlock(true_block);
+ AddBlock(false_block);
AddBlock(new_pre_header);
- header->ReplacePredecessor(pre_header, new_pre_header);
- pre_header->successors_.clear();
- pre_header->dominated_blocks_.clear();
-
- pre_header->AddSuccessor(if_block);
- if_block->AddSuccessor(dummy_block); // True successor
- if_block->AddSuccessor(deopt_block); // False successor
- dummy_block->AddSuccessor(new_pre_header);
- deopt_block->AddSuccessor(new_pre_header);
-
- pre_header->dominated_blocks_.push_back(if_block);
- if_block->SetDominator(pre_header);
- if_block->dominated_blocks_.push_back(dummy_block);
- dummy_block->SetDominator(if_block);
- if_block->dominated_blocks_.push_back(deopt_block);
- deopt_block->SetDominator(if_block);
+ header->ReplacePredecessor(old_pre_header, new_pre_header);
+ old_pre_header->successors_.clear();
+ old_pre_header->dominated_blocks_.clear();
+
+ old_pre_header->AddSuccessor(if_block);
+ if_block->AddSuccessor(true_block); // True successor
+ if_block->AddSuccessor(false_block); // False successor
+ true_block->AddSuccessor(new_pre_header);
+ false_block->AddSuccessor(new_pre_header);
+
+ old_pre_header->dominated_blocks_.push_back(if_block);
+ if_block->SetDominator(old_pre_header);
+ if_block->dominated_blocks_.push_back(true_block);
+ true_block->SetDominator(if_block);
+ if_block->dominated_blocks_.push_back(false_block);
+ false_block->SetDominator(if_block);
if_block->dominated_blocks_.push_back(new_pre_header);
new_pre_header->SetDominator(if_block);
new_pre_header->dominated_blocks_.push_back(header);
header->SetDominator(new_pre_header);
+ // Fix reverse post order.
size_t index_of_header = IndexOfElement(reverse_post_order_, header);
MakeRoomFor(&reverse_post_order_, 4, index_of_header - 1);
reverse_post_order_[index_of_header++] = if_block;
- reverse_post_order_[index_of_header++] = dummy_block;
- reverse_post_order_[index_of_header++] = deopt_block;
+ reverse_post_order_[index_of_header++] = true_block;
+ reverse_post_order_[index_of_header++] = false_block;
reverse_post_order_[index_of_header++] = new_pre_header;
- HLoopInformation* info = pre_header->GetLoopInformation();
- if (info != nullptr) {
- if_block->SetLoopInformation(info);
- dummy_block->SetLoopInformation(info);
- deopt_block->SetLoopInformation(info);
- new_pre_header->SetLoopInformation(info);
- for (HLoopInformationOutwardIterator loop_it(*pre_header);
+ // Fix loop information.
+ HLoopInformation* loop_info = old_pre_header->GetLoopInformation();
+ if (loop_info != nullptr) {
+ if_block->SetLoopInformation(loop_info);
+ true_block->SetLoopInformation(loop_info);
+ false_block->SetLoopInformation(loop_info);
+ new_pre_header->SetLoopInformation(loop_info);
+ // Add blocks to all enveloping loops.
+ for (HLoopInformationOutwardIterator loop_it(*old_pre_header);
!loop_it.Done();
loop_it.Advance()) {
loop_it.Current()->Add(if_block);
- loop_it.Current()->Add(dummy_block);
- loop_it.Current()->Add(deopt_block);
+ loop_it.Current()->Add(true_block);
+ loop_it.Current()->Add(false_block);
loop_it.Current()->Add(new_pre_header);
}
}
+
+ // Fix try/catch information.
+ TryCatchInformation* try_catch_info = old_pre_header->IsTryBlock()
+ ? old_pre_header->GetTryCatchInformation()
+ : nullptr;
+ if_block->SetTryCatchInformation(try_catch_info);
+ true_block->SetTryCatchInformation(try_catch_info);
+ false_block->SetTryCatchInformation(try_catch_info);
+ new_pre_header->SetTryCatchInformation(try_catch_info);
}
void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
@@ -1940,6 +2112,60 @@ bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const {
return !opt.GetDoesNotNeedDexCache();
}
+void HInvokeStaticOrDirect::InsertInputAt(size_t index, HInstruction* input) {
+ inputs_.insert(inputs_.begin() + index, HUserRecord<HInstruction*>(input));
+ input->AddUseAt(this, index);
+ // Update indexes in use nodes of inputs that have been pushed further back by the insert().
+ for (size_t i = index + 1u, size = inputs_.size(); i != size; ++i) {
+ DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i - 1u);
+ InputRecordAt(i).GetUseNode()->SetIndex(i);
+ }
+}
+
+void HInvokeStaticOrDirect::RemoveInputAt(size_t index) {
+ RemoveAsUserOfInput(index);
+ inputs_.erase(inputs_.begin() + index);
+ // Update indexes in use nodes of inputs that have been pulled forward by the erase().
+ for (size_t i = index, e = InputCount(); i < e; ++i) {
+ DCHECK_EQ(InputRecordAt(i).GetUseNode()->GetIndex(), i + 1u);
+ InputRecordAt(i).GetUseNode()->SetIndex(i);
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
+ switch (rhs) {
+ case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
+ return os << "string_init";
+ case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
+ return os << "recursive";
+ case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
+ return os << "direct";
+ case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddressWithFixup:
+ return os << "direct_fixup";
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+ return os << "dex_cache_pc_relative";
+ case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+ return os << "dex_cache_via_method";
+ default:
+ LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
+ UNREACHABLE();
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs) {
+ switch (rhs) {
+ case HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit:
+ return os << "explicit";
+ case HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit:
+ return os << "implicit";
+ case HInvokeStaticOrDirect::ClinitCheckRequirement::kNone:
+ return os << "none";
+ default:
+ LOG(FATAL) << "Unknown ClinitCheckRequirement: " << static_cast<int>(rhs);
+ UNREACHABLE();
+ }
+}
+
void HInstruction::RemoveEnvironmentUsers() {
for (HUseIterator<HEnvironment*> use_it(GetEnvUses()); !use_it.Done(); use_it.Advance()) {
HUseListNode<HEnvironment*>* user_node = use_it.Current();
@@ -1949,4 +2175,46 @@ void HInstruction::RemoveEnvironmentUsers() {
env_uses_.Clear();
}
+// Returns an instruction with the opposite boolean value from 'cond'.
+HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) {
+ ArenaAllocator* allocator = GetArena();
+
+ if (cond->IsCondition() &&
+ !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) {
+ // Can't reverse floating point conditions. We have to use HBooleanNot in that case.
+ HInstruction* lhs = cond->InputAt(0);
+ HInstruction* rhs = cond->InputAt(1);
+ HInstruction* replacement = nullptr;
+ switch (cond->AsCondition()->GetOppositeCondition()) { // get *opposite*
+ case kCondEQ: replacement = new (allocator) HEqual(lhs, rhs); break;
+ case kCondNE: replacement = new (allocator) HNotEqual(lhs, rhs); break;
+ case kCondLT: replacement = new (allocator) HLessThan(lhs, rhs); break;
+ case kCondLE: replacement = new (allocator) HLessThanOrEqual(lhs, rhs); break;
+ case kCondGT: replacement = new (allocator) HGreaterThan(lhs, rhs); break;
+ case kCondGE: replacement = new (allocator) HGreaterThanOrEqual(lhs, rhs); break;
+ case kCondB: replacement = new (allocator) HBelow(lhs, rhs); break;
+ case kCondBE: replacement = new (allocator) HBelowOrEqual(lhs, rhs); break;
+ case kCondA: replacement = new (allocator) HAbove(lhs, rhs); break;
+ case kCondAE: replacement = new (allocator) HAboveOrEqual(lhs, rhs); break;
+ default:
+ LOG(FATAL) << "Unexpected condition";
+ UNREACHABLE();
+ }
+ cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+ return replacement;
+ } else if (cond->IsIntConstant()) {
+ HIntConstant* int_const = cond->AsIntConstant();
+ if (int_const->IsZero()) {
+ return GetIntConstant(1);
+ } else {
+ DCHECK(int_const->IsOne());
+ return GetIntConstant(0);
+ }
+ } else {
+ HInstruction* replacement = new (allocator) HBooleanNot(cond);
+ cursor->GetBlock()->InsertInstructionBefore(replacement, cursor);
+ return replacement;
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 0f2c1cffee..19614f11c6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -35,6 +35,7 @@
#include "mirror/class.h"
#include "offsets.h"
#include "primitive.h"
+#include "utils/array_ref.h"
namespace art {
@@ -240,8 +241,9 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// put deoptimization instructions, etc.
void TransformLoopHeaderForBCE(HBasicBlock* header);
- // Removes `block` from the graph.
- void DeleteDeadBlock(HBasicBlock* block);
+ // Removes `block` from the graph. Assumes `block` has been disconnected from
+ // other blocks and has no instructions or phis.
+ void DeleteDeadEmptyBlock(HBasicBlock* block);
// Splits the edge between `block` and `successor` while preserving the
// indices in the predecessor/successor lists. If there are multiple edges
@@ -350,8 +352,6 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
HCurrentMethod* GetCurrentMethod();
- HBasicBlock* FindCommonDominator(HBasicBlock* first, HBasicBlock* second) const;
-
const DexFile& GetDexFile() const {
return dex_file_;
}
@@ -371,6 +371,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
+ // Returns an instruction with the opposite boolean value from 'cond'.
+ // The instruction has been inserted into the graph, either as a constant, or
+ // before cursor.
+ HInstruction* InsertOppositeCondition(HInstruction* cond, HInstruction* cursor);
+
private:
void FindBackEdges(ArenaBitVector* visited);
void RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visited) const;
@@ -661,6 +666,9 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
return successors_;
}
+ ArrayRef<HBasicBlock* const> GetNormalSuccessors() const;
+ ArrayRef<HBasicBlock* const> GetExceptionalSuccessors() const;
+
bool HasSuccessor(const HBasicBlock* block, size_t start_from = 0u) {
return ContainsElement(successors_, block, start_from);
}
@@ -811,12 +819,6 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
return GetPredecessorIndexOf(predecessor) == idx;
}
- // Returns the number of non-exceptional successors. SsaChecker ensures that
- // these are stored at the beginning of the successor list.
- size_t NumberOfNormalSuccessors() const {
- return EndsWithTryBoundary() ? 1 : GetSuccessors().size();
- }
-
// Create a new block between this block and its predecessors. The new block
// is added to the graph, all predecessor edges are relinked to it and an edge
// is created to `this`. Returns the new empty block. Reverse post order or
@@ -837,6 +839,15 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
// blocks are consistent (for example ending with a control flow instruction).
HBasicBlock* SplitAfter(HInstruction* cursor);
+ // Split catch block into two blocks after the original move-exception bytecode
+ // instruction, or at the beginning if not present. Returns the newly created,
+ // latter block, or nullptr if such block could not be created (must be dead
+ // in that case). Note that this method just updates raw block information,
+ // like predecessors, successors, dominators, and instruction list. It does not
+ // update the graph, reverse post order, loop information, nor make sure the
+ // blocks are consistent (for example ending with a control flow instruction).
+ HBasicBlock* SplitCatchBlockAfterMoveException();
+
// Merge `other` at the end of `this`. Successors and dominated blocks of
// `other` are changed to be successors and dominated blocks of `this`. Note
// that this method does not update the graph, reverse post order, loop
@@ -1084,13 +1095,20 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(UShr, BinaryOperation) \
M(Xor, BinaryOperation) \
+#ifndef ART_ENABLE_CODEGEN_arm
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M) \
+ M(ArmDexCacheArraysBase, Instruction)
+#endif
#ifndef ART_ENABLE_CODEGEN_arm64
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
#else
#define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M) \
- M(Arm64IntermediateAddress, Instruction)
+ M(Arm64DataProcWithShifterOp, Instruction) \
+ M(Arm64IntermediateAddress, Instruction) \
+ M(Arm64MultiplyAccumulate, Instruction)
#endif
#define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)
@@ -1430,7 +1448,7 @@ class SideEffects : public ValueObject {
return flags_ == (kAllChangeBits | kAllDependOnBits);
}
- // Returns true if this may read something written by other.
+ // Returns true if `this` may read something written by `other`.
bool MayDependOn(SideEffects other) const {
const uint64_t depends_on_flags = (flags_ & kAllDependOnBits) >> kChangeBits;
return (other.flags_ & depends_on_flags);
@@ -1620,6 +1638,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
return holder_;
}
+
+ bool IsFromInlinedInvoke() const {
+ return GetParent() != nullptr;
+ }
+
private:
// Record instructions' use entries of this environment for constant-time removal.
// It should only be called by HInstruction when a new environment use is added.
@@ -1725,6 +1748,13 @@ class ReferenceTypeInfo : ValueObject {
return GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
}
+ bool IsStrictSupertypeOf(ReferenceTypeInfo rti) const SHARED_REQUIRES(Locks::mutator_lock_) {
+ DCHECK(IsValid());
+ DCHECK(rti.IsValid());
+ return GetTypeHandle().Get() != rti.GetTypeHandle().Get() &&
+ GetTypeHandle()->IsAssignableFrom(rti.GetTypeHandle().Get());
+ }
+
// Returns true if the type information provide the same amount of details.
// Note that it does not mean that the instructions have the same actual type
// (because the type can be the result of a merge).
@@ -1927,6 +1957,14 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// Move `this` instruction before `cursor`.
void MoveBefore(HInstruction* cursor);
+ // Move `this` before its first user and out of any loops. If there is no
+ // out-of-loop user that dominates all other users, move the instruction
+ // to the end of the out-of-loop common dominator of the user's blocks.
+ //
+ // This can be used only on non-throwing instructions with no side effects that
+ // have at least one use but no environment uses.
+ void MoveBeforeFirstUserAndOutOfLoops();
+
#define INSTRUCTION_TYPE_CHECK(type, super) \
bool Is##type() const { return (As##type() != nullptr); } \
virtual const H##type* As##type() const { return nullptr; } \
@@ -2390,6 +2428,10 @@ class HTryBoundary : public HTemplateInstruction<0> {
// Returns the block's non-exceptional successor (index zero).
HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
+ ArrayRef<HBasicBlock* const> GetExceptionHandlers() const {
+ return ArrayRef<HBasicBlock* const>(GetBlock()->GetSuccessors()).SubArray(1u);
+ }
+
// Returns whether `handler` is among its exception handlers (non-zero index
// successors).
bool HasExceptionHandler(const HBasicBlock& handler) const {
@@ -2417,25 +2459,6 @@ class HTryBoundary : public HTemplateInstruction<0> {
DISALLOW_COPY_AND_ASSIGN(HTryBoundary);
};
-// Iterator over exception handlers of a given HTryBoundary, i.e. over
-// exceptional successors of its basic block.
-class HExceptionHandlerIterator : public ValueObject {
- public:
- explicit HExceptionHandlerIterator(const HTryBoundary& try_boundary)
- : block_(*try_boundary.GetBlock()), index_(block_.NumberOfNormalSuccessors()) {}
-
- bool Done() const { return index_ == block_.GetSuccessors().size(); }
- HBasicBlock* Current() const { return block_.GetSuccessors()[index_]; }
- size_t CurrentSuccessorIndex() const { return index_; }
- void Advance() { ++index_; }
-
- private:
- const HBasicBlock& block_;
- size_t index_;
-
- DISALLOW_COPY_AND_ASSIGN(HExceptionHandlerIterator);
-};
-
// Deoptimize to interpreter, upon checking a condition.
class HDeoptimize : public HTemplateInstruction<1> {
public:
@@ -2604,6 +2627,11 @@ class HBinaryOperation : public HExpression<2> {
VLOG(compiler) << DebugName() << " is not defined for the (long, int) case.";
return nullptr;
}
+ virtual HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+ HNullConstant* y ATTRIBUTE_UNUSED) const {
+ VLOG(compiler) << DebugName() << " is not defined for the (null, null) case.";
+ return nullptr;
+ }
// Returns an input that can legally be used as the right input and is
// constant, or null.
@@ -2694,6 +2722,10 @@ class HEqual : public HCondition {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
+ HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+ HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(1);
+ }
DECLARE_INSTRUCTION(Equal);
@@ -2726,6 +2758,10 @@ class HNotEqual : public HCondition {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
+ HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
+ HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ return GetBlock()->GetGraph()->GetIntConstant(0);
+ }
DECLARE_INSTRUCTION(NotEqual);
@@ -3227,7 +3263,7 @@ class HInvoke : public HInstruction {
void SetIntrinsic(Intrinsics intrinsic, IntrinsicNeedsEnvironmentOrCache needs_env_or_cache);
bool IsFromInlinedInvoke() const {
- return GetEnvironment()->GetParent() != nullptr;
+ return GetEnvironment()->IsFromInlinedInvoke();
}
bool CanThrow() const OVERRIDE { return true; }
@@ -3395,28 +3431,48 @@ class HInvokeStaticOrDirect : public HInvoke {
MethodReference target_method,
DispatchInfo dispatch_info,
InvokeType original_invoke_type,
- InvokeType invoke_type,
+ InvokeType optimized_invoke_type,
ClinitCheckRequirement clinit_check_requirement)
: HInvoke(arena,
number_of_arguments,
- // There is one extra argument for the HCurrentMethod node, and
- // potentially one other if the clinit check is explicit, and one other
- // if the method is a string factory.
- 1u + (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u)
- + (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
+ // There is potentially one extra argument for the HCurrentMethod node, and
+ // potentially one other if the clinit check is explicit, and potentially
+ // one other if the method is a string factory.
+ (NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
+ (clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u) +
+ (dispatch_info.method_load_kind == MethodLoadKind::kStringInit ? 1u : 0u),
return_type,
dex_pc,
method_index,
original_invoke_type),
- invoke_type_(invoke_type),
+ optimized_invoke_type_(optimized_invoke_type),
clinit_check_requirement_(clinit_check_requirement),
target_method_(target_method),
- dispatch_info_(dispatch_info) {}
+ dispatch_info_(dispatch_info) { }
void SetDispatchInfo(const DispatchInfo& dispatch_info) {
+ bool had_current_method_input = HasCurrentMethodInput();
+ bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind);
+
+ // Using the current method is the default and once we find a better
+ // method load kind, we should not go back to using the current method.
+ DCHECK(had_current_method_input || !needs_current_method_input);
+
+ if (had_current_method_input && !needs_current_method_input) {
+ DCHECK_EQ(InputAt(GetSpecialInputIndex()), GetBlock()->GetGraph()->GetCurrentMethod());
+ RemoveInputAt(GetSpecialInputIndex());
+ }
dispatch_info_ = dispatch_info;
}
+ void AddSpecialInput(HInstruction* input) {
+ // We allow only one special input.
+ DCHECK(!IsStringInit() && !HasCurrentMethodInput());
+ DCHECK(InputCount() == GetSpecialInputIndex() ||
+ (InputCount() == GetSpecialInputIndex() + 1 && IsStaticWithExplicitClinitCheck()));
+ InsertInputAt(GetSpecialInputIndex(), input);
+ }
+
bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
// We access the method via the dex cache so we can't do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
@@ -3427,19 +3483,42 @@ class HInvokeStaticOrDirect : public HInvoke {
return return_type_ == Primitive::kPrimNot && !IsStringInit();
}
- InvokeType GetInvokeType() const { return invoke_type_; }
+ // Get the index of the special input, if any.
+ //
+ // If the invoke IsStringInit(), it initially has a HFakeString special argument
+ // which is removed by the instruction simplifier; if the invoke HasCurrentMethodInput(),
+ // the "special input" is the current method pointer; otherwise there may be one
+ // platform-specific special input, such as PC-relative addressing base.
+ uint32_t GetSpecialInputIndex() const { return GetNumberOfArguments(); }
+
+ InvokeType GetOptimizedInvokeType() const { return optimized_invoke_type_; }
+ void SetOptimizedInvokeType(InvokeType invoke_type) {
+ optimized_invoke_type_ = invoke_type;
+ }
+
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
- uint32_t GetCurrentMethodInputIndex() const { return GetNumberOfArguments(); }
bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
- bool HasPcRelDexCache() const {
+ bool HasPcRelativeDexCache() const {
return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
}
+ bool HasCurrentMethodInput() const {
+ // This function can be called only after the invoke has been fully initialized by the builder.
+ if (NeedsCurrentMethodInput(GetMethodLoadKind())) {
+ DCHECK(InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+ return true;
+ } else {
+ DCHECK(InputCount() == GetSpecialInputIndex() ||
+ !InputAt(GetSpecialInputIndex())->IsCurrentMethod());
+ return false;
+ }
+ }
bool HasDirectCodePtr() const { return GetCodePtrLocation() == CodePtrLocation::kCallDirect; }
MethodReference GetTargetMethod() const { return target_method_; }
+ void SetTargetMethod(MethodReference method) { target_method_ = method; }
int32_t GetStringInitOffset() const {
DCHECK(IsStringInit());
@@ -3452,7 +3531,7 @@ class HInvokeStaticOrDirect : public HInvoke {
}
uint32_t GetDexCacheArrayOffset() const {
- DCHECK(HasPcRelDexCache());
+ DCHECK(HasPcRelativeDexCache());
return dispatch_info_.method_load_data;
}
@@ -3465,29 +3544,28 @@ class HInvokeStaticOrDirect : public HInvoke {
// Is this instruction a call to a static method?
bool IsStatic() const {
- return GetInvokeType() == kStatic;
+ return GetOriginalInvokeType() == kStatic;
}
- // Remove the art::HLoadClass instruction set as last input by
- // art::PrepareForRegisterAllocation::VisitClinitCheck in lieu of
- // the initial art::HClinitCheck instruction (only relevant for
- // static calls with explicit clinit check).
- void RemoveLoadClassAsLastInput() {
+ // Remove the HClinitCheck or the replacement HLoadClass (set as last input by
+ // PrepareForRegisterAllocation::VisitClinitCheck() in lieu of the initial HClinitCheck)
+ // instruction; only relevant for static calls with explicit clinit check.
+ void RemoveExplicitClinitCheck(ClinitCheckRequirement new_requirement) {
DCHECK(IsStaticWithExplicitClinitCheck());
size_t last_input_index = InputCount() - 1;
HInstruction* last_input = InputAt(last_input_index);
DCHECK(last_input != nullptr);
- DCHECK(last_input->IsLoadClass()) << last_input->DebugName();
+ DCHECK(last_input->IsLoadClass() || last_input->IsClinitCheck()) << last_input->DebugName();
RemoveAsUserOfInput(last_input_index);
inputs_.pop_back();
- clinit_check_requirement_ = ClinitCheckRequirement::kImplicit;
- DCHECK(IsStaticWithImplicitClinitCheck());
+ clinit_check_requirement_ = new_requirement;
+ DCHECK(!IsStaticWithExplicitClinitCheck());
}
bool IsStringFactoryFor(HFakeString* str) const {
if (!IsStringInit()) return false;
- // +1 for the current method.
- if (InputCount() == (number_of_arguments_ + 1)) return false;
+ DCHECK(!HasCurrentMethodInput());
+ if (InputCount() == (number_of_arguments_)) return false;
return InputAt(InputCount() - 1)->AsFakeString() == str;
}
@@ -3502,7 +3580,7 @@ class HInvokeStaticOrDirect : public HInvoke {
}
// Is this a call to a static method whose declaring class has an
- // explicit intialization check in the graph?
+ // explicit initialization check in the graph?
bool IsStaticWithExplicitClinitCheck() const {
return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kExplicit);
}
@@ -3513,6 +3591,11 @@ class HInvokeStaticOrDirect : public HInvoke {
return IsStatic() && (clinit_check_requirement_ == ClinitCheckRequirement::kImplicit);
}
+ // Does this method load kind need the current method as an input?
+ static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
+ return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod;
+ }
+
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
protected:
@@ -3530,8 +3613,11 @@ class HInvokeStaticOrDirect : public HInvoke {
return input_record;
}
+ void InsertInputAt(size_t index, HInstruction* input);
+ void RemoveInputAt(size_t index);
+
private:
- const InvokeType invoke_type_;
+ InvokeType optimized_invoke_type_;
ClinitCheckRequirement clinit_check_requirement_;
// The target method may refer to different dex file or method index than the original
// invoke. This happens for sharpened calls and for calls where a method was redeclared
@@ -3541,6 +3627,8 @@ class HInvokeStaticOrDirect : public HInvoke {
DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
};
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
+std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
class HInvokeVirtual : public HInvoke {
public:
@@ -3595,18 +3683,24 @@ class HInvokeInterface : public HInvoke {
DISALLOW_COPY_AND_ASSIGN(HInvokeInterface);
};
-class HNewInstance : public HExpression<1> {
+class HNewInstance : public HExpression<2> {
public:
- HNewInstance(HCurrentMethod* current_method,
+ HNewInstance(HInstruction* cls,
+ HCurrentMethod* current_method,
uint32_t dex_pc,
uint16_t type_index,
const DexFile& dex_file,
+ bool can_throw,
+ bool finalizable,
QuickEntrypointEnum entrypoint)
: HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc),
type_index_(type_index),
dex_file_(dex_file),
+ can_throw_(can_throw),
+ finalizable_(finalizable),
entrypoint_(entrypoint) {
- SetRawInputAt(0, current_method);
+ SetRawInputAt(0, cls);
+ SetRawInputAt(1, current_method);
}
uint16_t GetTypeIndex() const { return type_index_; }
@@ -3614,22 +3708,30 @@ class HNewInstance : public HExpression<1> {
// Calls runtime so needs an environment.
bool NeedsEnvironment() const OVERRIDE { return true; }
- // It may throw when called on:
- // - interfaces
- // - abstract/innaccessible/unknown classes
- // TODO: optimize when possible.
- bool CanThrow() const OVERRIDE { return true; }
+
+ // It may throw when called on type that's not instantiable/accessible.
+ // It can throw OOME.
+ // TODO: distinguish between the two cases so we can for example allow allocation elimination.
+ bool CanThrow() const OVERRIDE { return can_throw_ || true; }
+
+ bool IsFinalizable() const { return finalizable_; }
bool CanBeNull() const OVERRIDE { return false; }
QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
+ void SetEntrypoint(QuickEntrypointEnum entrypoint) {
+ entrypoint_ = entrypoint;
+ }
+
DECLARE_INSTRUCTION(NewInstance);
private:
const uint16_t type_index_;
const DexFile& dex_file_;
- const QuickEntrypointEnum entrypoint_;
+ const bool can_throw_;
+ const bool finalizable_;
+ QuickEntrypointEnum entrypoint_;
DISALLOW_COPY_AND_ASSIGN(HNewInstance);
};
@@ -4237,9 +4339,13 @@ class HPhi : public HInstruction {
: HInstruction(SideEffects::None(), dex_pc),
inputs_(number_of_inputs, arena->Adapter(kArenaAllocPhiInputs)),
reg_number_(reg_number),
- type_(type),
- is_live_(false),
+ type_(ToPhiType(type)),
+ // Phis are constructed live and marked dead if conflicting or unused.
+ // Individual steps of SsaBuilder should assume that if a phi has been
+ // marked dead, it can be ignored and will be removed by SsaPhiElimination.
+ is_live_(true),
can_be_null_(true) {
+ DCHECK_NE(type_, Primitive::kPrimVoid);
}
// Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
@@ -4710,13 +4816,15 @@ class HLoadClass : public HExpression<1> {
const DexFile& dex_file,
bool is_referrers_class,
uint32_t dex_pc,
- bool needs_access_check)
+ bool needs_access_check,
+ bool is_in_dex_cache)
: HExpression(Primitive::kPrimNot, SideEffectsForArchRuntimeCalls(), dex_pc),
type_index_(type_index),
dex_file_(dex_file),
is_referrers_class_(is_referrers_class),
generate_clinit_check_(false),
needs_access_check_(needs_access_check),
+ is_in_dex_cache_(is_in_dex_cache),
loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
// Referrers class should not need access check. We never inline unverified
// methods so we can't possibly end up in this situation.
@@ -4741,14 +4849,13 @@ class HLoadClass : public HExpression<1> {
bool CanBeNull() const OVERRIDE { return false; }
bool NeedsEnvironment() const OVERRIDE {
- // Will call runtime and load the class if the class is not loaded yet.
- // TODO: finer grain decision.
- return !is_referrers_class_;
+ return CanCallRuntime();
}
bool MustGenerateClinitCheck() const {
return generate_clinit_check_;
}
+
void SetMustGenerateClinitCheck(bool generate_clinit_check) {
// The entrypoint the code generator is going to call does not do
// clinit of the class.
@@ -4757,7 +4864,9 @@ class HLoadClass : public HExpression<1> {
}
bool CanCallRuntime() const {
- return MustGenerateClinitCheck() || !is_referrers_class_ || needs_access_check_;
+ return MustGenerateClinitCheck() ||
+ (!is_referrers_class_ && !is_in_dex_cache_) ||
+ needs_access_check_;
}
bool NeedsAccessCheck() const {
@@ -4765,8 +4874,6 @@ class HLoadClass : public HExpression<1> {
}
bool CanThrow() const OVERRIDE {
- // May call runtime and and therefore can throw.
- // TODO: finer grain decision.
return CanCallRuntime();
}
@@ -4788,6 +4895,8 @@ class HLoadClass : public HExpression<1> {
return SideEffects::CanTriggerGC();
}
+ bool IsInDexCache() const { return is_in_dex_cache_; }
+
DECLARE_INSTRUCTION(LoadClass);
private:
@@ -4797,7 +4906,8 @@ class HLoadClass : public HExpression<1> {
// Whether this instruction must generate the initialization check.
// Used for code generation.
bool generate_clinit_check_;
- bool needs_access_check_;
+ const bool needs_access_check_;
+ const bool is_in_dex_cache_;
ReferenceTypeInfo loaded_class_rti_;
@@ -4862,6 +4972,7 @@ class HClinitCheck : public HExpression<1> {
return true;
}
+ bool CanThrow() const OVERRIDE { return true; }
HLoadClass* GetLoadClass() const { return InputAt(0)->AsLoadClass(); }
@@ -5474,6 +5585,9 @@ class HParallelMove : public HTemplateInstruction<0> {
} // namespace art
+#ifdef ART_ENABLE_CODEGEN_arm
+#include "nodes_arm.h"
+#endif
#ifdef ART_ENABLE_CODEGEN_arm64
#include "nodes_arm64.h"
#endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
new file mode 100644
index 0000000000..6a1dbb9e70
--- /dev/null
+++ b/compiler/optimizing/nodes_arm.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
+
+namespace art {
+
+class HArmDexCacheArraysBase : public HExpression<0> {
+ public:
+ explicit HArmDexCacheArraysBase(const DexFile& dex_file)
+ : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
+ dex_file_(&dex_file),
+ element_offset_(static_cast<size_t>(-1)) { }
+
+ void UpdateElementOffset(size_t element_offset) {
+ // Use the lowest offset from the requested elements so that all offsets from
+ // this base are non-negative because our assemblers emit negative-offset loads
+ // as a sequence of two or more instructions. (However, positive offsets beyond
+ // 4KiB also require two or more instructions, so this simple heuristic could
+ // be improved for cases where there is a dense cluster of elements far from
+ // the lowest offset. This is expected to be rare enough though, so we choose
+ // not to spend compile time on elaborate calculations.)
+ element_offset_ = std::min(element_offset_, element_offset);
+ }
+
+ const DexFile& GetDexFile() const {
+ return *dex_file_;
+ }
+
+ size_t GetElementOffset() const {
+ return element_offset_;
+ }
+
+ DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
+
+ private:
+ const DexFile* dex_file_;
+ size_t element_offset_;
+
+ DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_arm64.cc b/compiler/optimizing/nodes_arm64.cc
new file mode 100644
index 0000000000..ac2f093847
--- /dev/null
+++ b/compiler/optimizing/nodes_arm64.cc
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common_arm64.h"
+#include "nodes.h"
+
+namespace art {
+
+using arm64::helpers::CanFitInShifterOperand;
+
+void HArm64DataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction,
+ /*out*/OpKind* op_kind,
+ /*out*/int* shift_amount) {
+ DCHECK(CanFitInShifterOperand(instruction));
+ if (instruction->IsShl()) {
+ *op_kind = kLSL;
+ *shift_amount = instruction->AsShl()->GetRight()->AsIntConstant()->GetValue();
+ } else if (instruction->IsShr()) {
+ *op_kind = kASR;
+ *shift_amount = instruction->AsShr()->GetRight()->AsIntConstant()->GetValue();
+ } else if (instruction->IsUShr()) {
+ *op_kind = kLSR;
+ *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue();
+ } else {
+ DCHECK(instruction->IsTypeConversion());
+ Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType();
+ Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType();
+ int result_size = Primitive::ComponentSize(result_type);
+ int input_size = Primitive::ComponentSize(input_type);
+ int min_size = std::min(result_size, input_size);
+ // This follows the logic in
+ // `InstructionCodeGeneratorARM64::VisitTypeConversion()`.
+ if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) {
+ // There is actually nothing to do. The register will be used as a W
+ // register, discarding the top bits. This is represented by the default
+ // encoding 'LSL 0'.
+ *op_kind = kLSL;
+ *shift_amount = 0;
+ } else if (result_type == Primitive::kPrimChar ||
+ (input_type == Primitive::kPrimChar && input_size < result_size)) {
+ *op_kind = kUXTH;
+ } else {
+ switch (min_size) {
+ case 1: *op_kind = kSXTB; break;
+ case 2: *op_kind = kSXTH; break;
+ case 4: *op_kind = kSXTW; break;
+ default:
+ LOG(FATAL) << "Unexpected min size " << min_size;
+ }
+ }
+ }
+}
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op) {
+ switch (op) {
+ case HArm64DataProcWithShifterOp::kLSL: return os << "LSL";
+ case HArm64DataProcWithShifterOp::kLSR: return os << "LSR";
+ case HArm64DataProcWithShifterOp::kASR: return os << "ASR";
+ case HArm64DataProcWithShifterOp::kUXTB: return os << "UXTB";
+ case HArm64DataProcWithShifterOp::kUXTH: return os << "UXTH";
+ case HArm64DataProcWithShifterOp::kUXTW: return os << "UXTW";
+ case HArm64DataProcWithShifterOp::kSXTB: return os << "SXTB";
+ case HArm64DataProcWithShifterOp::kSXTH: return os << "SXTH";
+ case HArm64DataProcWithShifterOp::kSXTW: return os << "SXTW";
+ default:
+ LOG(FATAL) << "Invalid OpKind " << static_cast<int>(op);
+ UNREACHABLE();
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/nodes_arm64.h b/compiler/optimizing/nodes_arm64.h
index 885d3a29ee..e8439354af 100644
--- a/compiler/optimizing/nodes_arm64.h
+++ b/compiler/optimizing/nodes_arm64.h
@@ -19,6 +19,79 @@
namespace art {
+class HArm64DataProcWithShifterOp : public HExpression<2> {
+ public:
+ enum OpKind {
+ kLSL, // Logical shift left.
+ kLSR, // Logical shift right.
+ kASR, // Arithmetic shift right.
+ kUXTB, // Unsigned extend byte.
+ kUXTH, // Unsigned extend half-word.
+ kUXTW, // Unsigned extend word.
+ kSXTB, // Signed extend byte.
+ kSXTH, // Signed extend half-word.
+ kSXTW, // Signed extend word.
+
+ // Aliases.
+ kFirstShiftOp = kLSL,
+ kLastShiftOp = kASR,
+ kFirstExtensionOp = kUXTB,
+ kLastExtensionOp = kSXTW
+ };
+ HArm64DataProcWithShifterOp(HInstruction* instr,
+ HInstruction* left,
+ HInstruction* right,
+ OpKind op,
+ // The shift argument is unused if the operation
+ // is an extension.
+ int shift = 0,
+ uint32_t dex_pc = kNoDexPc)
+ : HExpression(instr->GetType(), SideEffects::None(), dex_pc),
+ instr_kind_(instr->GetKind()), op_kind_(op), shift_amount_(shift) {
+ DCHECK(!instr->HasSideEffects());
+ SetRawInputAt(0, left);
+ SetRawInputAt(1, right);
+ }
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(HInstruction* other_instr) const OVERRIDE {
+ HArm64DataProcWithShifterOp* other = other_instr->AsArm64DataProcWithShifterOp();
+ return instr_kind_ == other->instr_kind_ &&
+ op_kind_ == other->op_kind_ &&
+ shift_amount_ == other->shift_amount_;
+ }
+
+ static bool IsShiftOp(OpKind op_kind) {
+ return kFirstShiftOp <= op_kind && op_kind <= kLastShiftOp;
+ }
+
+ static bool IsExtensionOp(OpKind op_kind) {
+ return kFirstExtensionOp <= op_kind && op_kind <= kLastExtensionOp;
+ }
+
+ // Find the operation kind and shift amount from a bitfield move instruction.
+ static void GetOpInfoFromInstruction(HInstruction* bitfield_op,
+ /*out*/OpKind* op_kind,
+ /*out*/int* shift_amount);
+
+ InstructionKind GetInstrKind() const { return instr_kind_; }
+ OpKind GetOpKind() const { return op_kind_; }
+ int GetShiftAmount() const { return shift_amount_; }
+
+ DECLARE_INSTRUCTION(Arm64DataProcWithShifterOp);
+
+ private:
+ InstructionKind instr_kind_;
+ OpKind op_kind_;
+ int shift_amount_;
+
+ friend std::ostream& operator<<(std::ostream& os, OpKind op);
+
+ DISALLOW_COPY_AND_ASSIGN(HArm64DataProcWithShifterOp);
+};
+
+std::ostream& operator<<(std::ostream& os, const HArm64DataProcWithShifterOp::OpKind op);
+
// This instruction computes an intermediate address pointing in the 'middle' of an object. The
// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is
// never used across anything that can trigger GC.
@@ -42,6 +115,40 @@ class HArm64IntermediateAddress : public HExpression<2> {
DISALLOW_COPY_AND_ASSIGN(HArm64IntermediateAddress);
};
+class HArm64MultiplyAccumulate : public HExpression<3> {
+ public:
+ HArm64MultiplyAccumulate(Primitive::Type type,
+ InstructionKind op,
+ HInstruction* accumulator,
+ HInstruction* mul_left,
+ HInstruction* mul_right,
+ uint32_t dex_pc = kNoDexPc)
+ : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) {
+ SetRawInputAt(kInputAccumulatorIndex, accumulator);
+ SetRawInputAt(kInputMulLeftIndex, mul_left);
+ SetRawInputAt(kInputMulRightIndex, mul_right);
+ }
+
+ static constexpr int kInputAccumulatorIndex = 0;
+ static constexpr int kInputMulLeftIndex = 1;
+ static constexpr int kInputMulRightIndex = 2;
+
+ bool CanBeMoved() const OVERRIDE { return true; }
+ bool InstructionDataEquals(HInstruction* other) const OVERRIDE {
+ return op_kind_ == other->AsArm64MultiplyAccumulate()->op_kind_;
+ }
+
+ InstructionKind GetOpKind() const { return op_kind_; }
+
+ DECLARE_INSTRUCTION(Arm64MultiplyAccumulate);
+
+ private:
+ // Indicates if this is a MADD or MSUB.
+ InstructionKind op_kind_;
+
+ DISALLOW_COPY_AND_ASSIGN(HArm64MultiplyAccumulate);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_ARM64_H_
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index 05c6b2ca57..2b0d522b31 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -23,6 +23,9 @@
#include "optimizing/code_generator.h"
#include "optimizing/optimizing_unit_test.h"
#include "utils/assembler.h"
+#include "utils/arm/assembler_thumb2.h"
+#include "utils/mips/assembler_mips.h"
+#include "utils/mips64/assembler_mips64.h"
#include "optimizing/optimizing_cfi_test_expected.inc"
@@ -36,52 +39,62 @@ class OptimizingCFITest : public CFITest {
// Enable this flag to generate the expected outputs.
static constexpr bool kGenerateExpected = false;
- void TestImpl(InstructionSet isa, const char* isa_str,
- const std::vector<uint8_t>& expected_asm,
- const std::vector<uint8_t>& expected_cfi) {
+ OptimizingCFITest()
+ : pool_(),
+ allocator_(&pool_),
+ opts_(),
+ isa_features_(),
+ graph_(nullptr),
+ code_gen_(),
+ blocks_(allocator_.Adapter()) {}
+
+ void SetUpFrame(InstructionSet isa) {
// Setup simple context.
- ArenaPool pool;
- ArenaAllocator allocator(&pool);
- CompilerOptions opts;
- std::unique_ptr<const InstructionSetFeatures> isa_features;
std::string error;
- isa_features.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
- HGraph* graph = CreateGraph(&allocator);
+ isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+ graph_ = CreateGraph(&allocator_);
// Generate simple frame with some spills.
- std::unique_ptr<CodeGenerator> code_gen(
- CodeGenerator::Create(graph, isa, *isa_features.get(), opts));
+ code_gen_.reset(CodeGenerator::Create(graph_, isa, *isa_features_, opts_));
+ code_gen_->GetAssembler()->cfi().SetEnabled(true);
const int frame_size = 64;
int core_reg = 0;
int fp_reg = 0;
for (int i = 0; i < 2; i++) { // Two registers of each kind.
for (; core_reg < 32; core_reg++) {
- if (code_gen->IsCoreCalleeSaveRegister(core_reg)) {
+ if (code_gen_->IsCoreCalleeSaveRegister(core_reg)) {
auto location = Location::RegisterLocation(core_reg);
- code_gen->AddAllocatedRegister(location);
+ code_gen_->AddAllocatedRegister(location);
core_reg++;
break;
}
}
for (; fp_reg < 32; fp_reg++) {
- if (code_gen->IsFloatingPointCalleeSaveRegister(fp_reg)) {
+ if (code_gen_->IsFloatingPointCalleeSaveRegister(fp_reg)) {
auto location = Location::FpuRegisterLocation(fp_reg);
- code_gen->AddAllocatedRegister(location);
+ code_gen_->AddAllocatedRegister(location);
fp_reg++;
break;
}
}
}
- ArenaVector<HBasicBlock*> blocks(allocator.Adapter());
- code_gen->block_order_ = &blocks;
- code_gen->ComputeSpillMask();
- code_gen->SetFrameSize(frame_size);
- code_gen->GenerateFrameEntry();
- code_gen->GenerateFrameExit();
+ code_gen_->block_order_ = &blocks_;
+ code_gen_->ComputeSpillMask();
+ code_gen_->SetFrameSize(frame_size);
+ code_gen_->GenerateFrameEntry();
+ }
+
+ void Finish() {
+ code_gen_->GenerateFrameExit();
+ code_gen_->Finalize(&code_allocator_);
+ }
+
+ void Check(InstructionSet isa,
+ const char* isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
// Get the outputs.
- InternalCodeAllocator code_allocator;
- code_gen->Finalize(&code_allocator);
- const std::vector<uint8_t>& actual_asm = code_allocator.GetMemory();
- Assembler* opt_asm = code_gen->GetAssembler();
+ const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+ Assembler* opt_asm = code_gen_->GetAssembler();
const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
if (kGenerateExpected) {
@@ -92,6 +105,19 @@ class OptimizingCFITest : public CFITest {
}
}
+ void TestImpl(InstructionSet isa, const char*
+ isa_str,
+ const std::vector<uint8_t>& expected_asm,
+ const std::vector<uint8_t>& expected_cfi) {
+ SetUpFrame(isa);
+ Finish();
+ Check(isa, isa_str, expected_asm, expected_cfi);
+ }
+
+ CodeGenerator* GetCodeGenerator() {
+ return code_gen_.get();
+ }
+
private:
class InternalCodeAllocator : public CodeAllocator {
public:
@@ -109,21 +135,111 @@ class OptimizingCFITest : public CFITest {
DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
};
+
+ ArenaPool pool_;
+ ArenaAllocator allocator_;
+ CompilerOptions opts_;
+ std::unique_ptr<const InstructionSetFeatures> isa_features_;
+ HGraph* graph_;
+ std::unique_ptr<CodeGenerator> code_gen_;
+ ArenaVector<HBasicBlock*> blocks_;
+ InternalCodeAllocator code_allocator_;
};
-#define TEST_ISA(isa) \
- TEST_F(OptimizingCFITest, isa) { \
- std::vector<uint8_t> expected_asm(expected_asm_##isa, \
- expected_asm_##isa + arraysize(expected_asm_##isa)); \
- std::vector<uint8_t> expected_cfi(expected_cfi_##isa, \
- expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
- TestImpl(isa, #isa, expected_asm, expected_cfi); \
+#define TEST_ISA(isa) \
+ TEST_F(OptimizingCFITest, isa) { \
+ std::vector<uint8_t> expected_asm( \
+ expected_asm_##isa, \
+ expected_asm_##isa + arraysize(expected_asm_##isa)); \
+ std::vector<uint8_t> expected_cfi( \
+ expected_cfi_##isa, \
+ expected_cfi_##isa + arraysize(expected_cfi_##isa)); \
+ TestImpl(isa, #isa, expected_asm, expected_cfi); \
}
TEST_ISA(kThumb2)
TEST_ISA(kArm64)
TEST_ISA(kX86)
TEST_ISA(kX86_64)
+TEST_ISA(kMips)
+TEST_ISA(kMips64)
+
+TEST_F(OptimizingCFITest, kThumb2Adjust) {
+ std::vector<uint8_t> expected_asm(
+ expected_asm_kThumb2_adjust,
+ expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust));
+ std::vector<uint8_t> expected_cfi(
+ expected_cfi_kThumb2_adjust,
+ expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust));
+ SetUpFrame(kThumb2);
+#define __ down_cast<arm::Thumb2Assembler*>(GetCodeGenerator()->GetAssembler())->
+ Label target;
+ __ CompareAndBranchIfZero(arm::R0, &target);
+ // Push the target out of range of CBZ.
+ for (size_t i = 0; i != 65; ++i) {
+ __ ldr(arm::R0, arm::Address(arm::R0));
+ }
+ __ Bind(&target);
+#undef __
+ Finish();
+ Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi);
+}
+
+TEST_F(OptimizingCFITest, kMipsAdjust) {
+ // One NOP in delay slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+ static constexpr size_t kNumNops = 1u + (1u << 15);
+ std::vector<uint8_t> expected_asm(
+ expected_asm_kMips_adjust_head,
+ expected_asm_kMips_adjust_head + arraysize(expected_asm_kMips_adjust_head));
+ expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+ expected_asm.insert(
+ expected_asm.end(),
+ expected_asm_kMips_adjust_tail,
+ expected_asm_kMips_adjust_tail + arraysize(expected_asm_kMips_adjust_tail));
+ std::vector<uint8_t> expected_cfi(
+ expected_cfi_kMips_adjust,
+ expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust));
+ SetUpFrame(kMips);
+#define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())->
+ mips::MipsLabel target;
+ __ Beqz(mips::A0, &target);
+ // Push the target out of range of BEQZ.
+ for (size_t i = 0; i != kNumNops; ++i) {
+ __ Nop();
+ }
+ __ Bind(&target);
+#undef __
+ Finish();
+ Check(kMips, "kMips_adjust", expected_asm, expected_cfi);
+}
+
+TEST_F(OptimizingCFITest, kMips64Adjust) {
+ // One NOP in forbidden slot, 1 << 15 NOPS have size 1 << 17 which exceeds 18-bit signed maximum.
+ static constexpr size_t kNumNops = 1u + (1u << 15);
+ std::vector<uint8_t> expected_asm(
+ expected_asm_kMips64_adjust_head,
+ expected_asm_kMips64_adjust_head + arraysize(expected_asm_kMips64_adjust_head));
+ expected_asm.resize(expected_asm.size() + kNumNops * 4u, 0u);
+ expected_asm.insert(
+ expected_asm.end(),
+ expected_asm_kMips64_adjust_tail,
+ expected_asm_kMips64_adjust_tail + arraysize(expected_asm_kMips64_adjust_tail));
+ std::vector<uint8_t> expected_cfi(
+ expected_cfi_kMips64_adjust,
+ expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust));
+ SetUpFrame(kMips64);
+#define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())->
+ mips64::Mips64Label target;
+ __ Beqc(mips64::A1, mips64::A2, &target);
+ // Push the target out of range of BEQC.
+ for (size_t i = 0; i != kNumNops; ++i) {
+ __ Nop();
+ }
+ __ Bind(&target);
+#undef __
+ Finish();
+ Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi);
+}
#endif // __ANDROID__
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 2c2c55f295..de857295c7 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -138,3 +138,332 @@ static constexpr uint8_t expected_cfi_kX86_64[] = {
// 0x0000002c: ret
// 0x0000002d: .cfi_restore_state
// 0x0000002d: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips[] = {
+ 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
+ 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
+ 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+ 0x00, 0x00, 0xA4, 0xAF, 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7,
+ 0x04, 0x00, 0xB5, 0xC7, 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7,
+ 0x10, 0x00, 0xB0, 0x8F, 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F,
+ 0x1C, 0x00, 0xBD, 0x27, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips[] = {
+ 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+ 0x54, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1,
+ 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -28
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: sw r31, +24(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +20(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: swc1 f22, +8(r29)
+// 0x00000014: swc1 f23, +12(r29)
+// 0x00000018: swc1 f20, +0(r29)
+// 0x0000001c: swc1 f21, +4(r29)
+// 0x00000020: addiu r29, r29, -36
+// 0x00000024: .cfi_def_cfa_offset: 64
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: .cfi_remember_state
+// 0x00000028: addiu r29, r29, 36
+// 0x0000002c: .cfi_def_cfa_offset: 28
+// 0x0000002c: lwc1 f20, +0(r29)
+// 0x00000030: lwc1 f21, +4(r29)
+// 0x00000034: lwc1 f22, +8(r29)
+// 0x00000038: lwc1 f23, +12(r29)
+// 0x0000003c: lw r16, +16(r29)
+// 0x00000040: .cfi_restore: r16
+// 0x00000040: lw r17, +20(r29)
+// 0x00000044: .cfi_restore: r17
+// 0x00000044: lw r31, +24(r29)
+// 0x00000048: .cfi_restore: r31
+// 0x00000048: addiu r29, r29, 28
+// 0x0000004c: .cfi_def_cfa_offset: 0
+// 0x0000004c: jr r31
+// 0x00000050: nop
+// 0x00000054: .cfi_restore_state
+// 0x00000054: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64[] = {
+ 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+ 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+ 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67,
+ 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
+ 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
+ 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64[] = {
+ 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x4C, 0x0E, 0x40, 0x44, 0x0A, 0x44, 0x0E, 0x28, 0x4C, 0xD0, 0x44, 0xD1,
+ 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: .cfi_remember_state
+// 0x00000020: daddiu r29, r29, 24
+// 0x00000024: .cfi_def_cfa_offset: 40
+// 0x00000024: ldc1 f24, +0(r29)
+// 0x00000028: ldc1 f25, +8(r29)
+// 0x0000002c: ld r16, +16(r29)
+// 0x00000030: .cfi_restore: r16
+// 0x00000030: ld r17, +24(r29)
+// 0x00000034: .cfi_restore: r17
+// 0x00000034: ld r31, +32(r29)
+// 0x00000038: .cfi_restore: r31
+// 0x00000038: daddiu r29, r29, 40
+// 0x0000003c: .cfi_def_cfa_offset: 0
+// 0x0000003c: jr r31
+// 0x00000040: nop
+// 0x00000044: .cfi_restore_state
+// 0x00000044: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
+ 0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28,
+ 0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
+ 0x0B, 0xB0, 0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
+};
+static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
+ 0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
+ 0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+ 0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
+ 0x0E, 0x40,
+};
+// 0x00000000: push {r5, r6, lr}
+// 0x00000002: .cfi_def_cfa_offset: 12
+// 0x00000002: .cfi_offset: r5 at cfa-12
+// 0x00000002: .cfi_offset: r6 at cfa-8
+// 0x00000002: .cfi_offset: r14 at cfa-4
+// 0x00000002: vpush.f32 {s16-s17}
+// 0x00000006: .cfi_def_cfa_offset: 20
+// 0x00000006: .cfi_offset_extended: r80 at cfa-20
+// 0x00000006: .cfi_offset_extended: r81 at cfa-16
+// 0x00000006: sub sp, sp, #44
+// 0x00000008: .cfi_def_cfa_offset: 64
+// 0x00000008: str r0, [sp, #0]
+// 0x0000000a: cmp r0, #0
+// 0x0000000c: beq +128 (0x00000090)
+// 0x0000000e: ldr r0, [r0, #0]
+// 0x00000010: ldr r0, [r0, #0]
+// 0x00000012: ldr r0, [r0, #0]
+// 0x00000014: ldr r0, [r0, #0]
+// 0x00000016: ldr r0, [r0, #0]
+// 0x00000018: ldr r0, [r0, #0]
+// 0x0000001a: ldr r0, [r0, #0]
+// 0x0000001c: ldr r0, [r0, #0]
+// 0x0000001e: ldr r0, [r0, #0]
+// 0x00000020: ldr r0, [r0, #0]
+// 0x00000022: ldr r0, [r0, #0]
+// 0x00000024: ldr r0, [r0, #0]
+// 0x00000026: ldr r0, [r0, #0]
+// 0x00000028: ldr r0, [r0, #0]
+// 0x0000002a: ldr r0, [r0, #0]
+// 0x0000002c: ldr r0, [r0, #0]
+// 0x0000002e: ldr r0, [r0, #0]
+// 0x00000030: ldr r0, [r0, #0]
+// 0x00000032: ldr r0, [r0, #0]
+// 0x00000034: ldr r0, [r0, #0]
+// 0x00000036: ldr r0, [r0, #0]
+// 0x00000038: ldr r0, [r0, #0]
+// 0x0000003a: ldr r0, [r0, #0]
+// 0x0000003c: ldr r0, [r0, #0]
+// 0x0000003e: ldr r0, [r0, #0]
+// 0x00000040: ldr r0, [r0, #0]
+// 0x00000042: ldr r0, [r0, #0]
+// 0x00000044: ldr r0, [r0, #0]
+// 0x00000046: ldr r0, [r0, #0]
+// 0x00000048: ldr r0, [r0, #0]
+// 0x0000004a: ldr r0, [r0, #0]
+// 0x0000004c: ldr r0, [r0, #0]
+// 0x0000004e: ldr r0, [r0, #0]
+// 0x00000050: ldr r0, [r0, #0]
+// 0x00000052: ldr r0, [r0, #0]
+// 0x00000054: ldr r0, [r0, #0]
+// 0x00000056: ldr r0, [r0, #0]
+// 0x00000058: ldr r0, [r0, #0]
+// 0x0000005a: ldr r0, [r0, #0]
+// 0x0000005c: ldr r0, [r0, #0]
+// 0x0000005e: ldr r0, [r0, #0]
+// 0x00000060: ldr r0, [r0, #0]
+// 0x00000062: ldr r0, [r0, #0]
+// 0x00000064: ldr r0, [r0, #0]
+// 0x00000066: ldr r0, [r0, #0]
+// 0x00000068: ldr r0, [r0, #0]
+// 0x0000006a: ldr r0, [r0, #0]
+// 0x0000006c: ldr r0, [r0, #0]
+// 0x0000006e: ldr r0, [r0, #0]
+// 0x00000070: ldr r0, [r0, #0]
+// 0x00000072: ldr r0, [r0, #0]
+// 0x00000074: ldr r0, [r0, #0]
+// 0x00000076: ldr r0, [r0, #0]
+// 0x00000078: ldr r0, [r0, #0]
+// 0x0000007a: ldr r0, [r0, #0]
+// 0x0000007c: ldr r0, [r0, #0]
+// 0x0000007e: ldr r0, [r0, #0]
+// 0x00000080: ldr r0, [r0, #0]
+// 0x00000082: ldr r0, [r0, #0]
+// 0x00000084: ldr r0, [r0, #0]
+// 0x00000086: ldr r0, [r0, #0]
+// 0x00000088: ldr r0, [r0, #0]
+// 0x0000008a: ldr r0, [r0, #0]
+// 0x0000008c: ldr r0, [r0, #0]
+// 0x0000008e: ldr r0, [r0, #0]
+// 0x00000090: .cfi_remember_state
+// 0x00000090: add sp, sp, #44
+// 0x00000092: .cfi_def_cfa_offset: 20
+// 0x00000092: vpop.f32 {s16-s17}
+// 0x00000096: .cfi_def_cfa_offset: 12
+// 0x00000096: .cfi_restore_extended: r80
+// 0x00000096: .cfi_restore_extended: r81
+// 0x00000096: pop {r5, r6, pc}
+// 0x00000098: .cfi_restore_state
+// 0x00000098: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
+ 0xE4, 0xFF, 0xBD, 0x27, 0x18, 0x00, 0xBF, 0xAF, 0x14, 0x00, 0xB1, 0xAF,
+ 0x10, 0x00, 0xB0, 0xAF, 0x08, 0x00, 0xB6, 0xE7, 0x0C, 0x00, 0xB7, 0xE7,
+ 0x00, 0x00, 0xB4, 0xE7, 0x04, 0x00, 0xB5, 0xE7, 0xDC, 0xFF, 0xBD, 0x27,
+ 0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+ 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
+ 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
+ 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
+};
+static constexpr uint8_t expected_asm_kMips_adjust_tail[] = {
+ 0x24, 0x00, 0xBD, 0x27, 0x00, 0x00, 0xB4, 0xC7, 0x04, 0x00, 0xB5, 0xC7,
+ 0x08, 0x00, 0xB6, 0xC7, 0x0C, 0x00, 0xB7, 0xC7, 0x10, 0x00, 0xB0, 0x8F,
+ 0x14, 0x00, 0xB1, 0x8F, 0x18, 0x00, 0xBF, 0x8F, 0x1C, 0x00, 0xBD, 0x27,
+ 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips_adjust[] = {
+ 0x44, 0x0E, 0x1C, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
+ 0x54, 0x0E, 0x40, 0x4C, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00,
+ 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x1C, 0x54, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
+ 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+};
+// 0x00000000: addiu r29, r29, -28
+// 0x00000004: .cfi_def_cfa_offset: 28
+// 0x00000004: sw r31, +24(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-4
+// 0x00000008: sw r17, +20(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-8
+// 0x0000000c: sw r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-12
+// 0x00000010: swc1 f22, +8(r29)
+// 0x00000014: swc1 f23, +12(r29)
+// 0x00000018: swc1 f20, +0(r29)
+// 0x0000001c: swc1 f21, +4(r29)
+// 0x00000020: addiu r29, r29, -36
+// 0x00000024: .cfi_def_cfa_offset: 64
+// 0x00000024: sw r4, +0(r29)
+// 0x00000028: bne r0, r4, 0x0000004c ; +36
+// 0x0000002c: addiu r29, r29, -4
+// 0x00000030: .cfi_def_cfa_offset: 68
+// 0x00000030: sw r31, +0(r29)
+// 0x00000034: bltzal r0, 0x00000038 ; +4
+// 0x00000038: lui r1, 0x20000
+// 0x0000003c: ori r1, r1, 24
+// 0x00000040: addu r1, r1, r31
+// 0x00000044: lw r31, +0(r29)
+// 0x00000048: jr r1
+// 0x0000004c: addiu r29, r29, 4
+// 0x00000050: .cfi_def_cfa_offset: 64
+// 0x00000050: nop
+// ...
+// 0x00020050: nop
+// 0x00020054: .cfi_remember_state
+// 0x00020054: addiu r29, r29, 36
+// 0x00020058: .cfi_def_cfa_offset: 28
+// 0x00020058: lwc1 f20, +0(r29)
+// 0x0002005c: lwc1 f21, +4(r29)
+// 0x00020060: lwc1 f22, +8(r29)
+// 0x00020064: lwc1 f23, +12(r29)
+// 0x00020068: lw r16, +16(r29)
+// 0x0002006c: .cfi_restore: r16
+// 0x0002006c: lw r17, +20(r29)
+// 0x00020070: .cfi_restore: r17
+// 0x00020070: lw r31, +24(r29)
+// 0x00020074: .cfi_restore: r31
+// 0x00020074: addiu r29, r29, 28
+// 0x00020078: .cfi_def_cfa_offset: 0
+// 0x00020078: jr r31
+// 0x0002007c: nop
+// 0x00020080: .cfi_restore_state
+// 0x00020080: .cfi_def_cfa_offset: 64
+
+static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
+ 0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
+ 0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
+ 0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+ 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+};
+static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
+ 0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
+ 0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
+ 0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+};
+static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
+ 0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+ 0x4C, 0x0E, 0x40, 0x04, 0x14, 0x00, 0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28,
+ 0x4C, 0xD0, 0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E,
+ 0x40,
+};
+// 0x00000000: daddiu r29, r29, -40
+// 0x00000004: .cfi_def_cfa_offset: 40
+// 0x00000004: sd r31, +32(r29)
+// 0x00000008: .cfi_offset: r31 at cfa-8
+// 0x00000008: sd r17, +24(r29)
+// 0x0000000c: .cfi_offset: r17 at cfa-16
+// 0x0000000c: sd r16, +16(r29)
+// 0x00000010: .cfi_offset: r16 at cfa-24
+// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000018: daddiu r29, r29, -24
+// 0x0000001c: .cfi_def_cfa_offset: 64
+// 0x0000001c: sd r4, +0(r29)
+// 0x00000020: bnec r5, r6, 0x0000002c ; +12
+// 0x00000024: auipc r1, 2
+// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x0000002c: nop
+// ...
+// 0x0002002c: nop
+// 0x00020030: .cfi_remember_state
+// 0x00020030: daddiu r29, r29, 24
+// 0x00020034: .cfi_def_cfa_offset: 40
+// 0x00020034: ldc1 f24, +0(r29)
+// 0x00020038: ldc1 f25, +8(r29)
+// 0x0002003c: ld r16, +16(r29)
+// 0x00020040: .cfi_restore: r16
+// 0x00020040: ld r17, +24(r29)
+// 0x00020044: .cfi_restore: r17
+// 0x00020044: ld r31, +32(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: daddiu r29, r29, 40
+// 0x0002004c: .cfi_def_cfa_offset: 0
+// 0x0002004c: jr r31
+// 0x00020050: nop
+// 0x00020054: .cfi_restore_state
+// 0x00020054: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 8cb2cfc816..cae2d3f01b 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -20,11 +20,15 @@
#include <stdint.h>
#ifdef ART_ENABLE_CODEGEN_arm64
+#include "dex_cache_array_fixups_arm.h"
+#endif
+
+#ifdef ART_ENABLE_CODEGEN_arm64
#include "instruction_simplifier_arm64.h"
#endif
#ifdef ART_ENABLE_CODEGEN_x86
-#include "constant_area_fixups_x86.h"
+#include "pc_relative_fixups_x86.h"
#endif
#include "art_method-inl.h"
@@ -56,6 +60,7 @@
#include "inliner.h"
#include "instruction_simplifier.h"
#include "intrinsics.h"
+#include "jit/jit_code_cache.h"
#include "licm.h"
#include "jni/quick/jni_compiler.h"
#include "load_store_elimination.h"
@@ -109,24 +114,23 @@ class PassScope;
class PassObserver : public ValueObject {
public:
PassObserver(HGraph* graph,
- const char* method_name,
CodeGenerator* codegen,
std::ostream* visualizer_output,
CompilerDriver* compiler_driver)
: graph_(graph),
- method_name_(method_name),
+ cached_method_name_(),
timing_logger_enabled_(compiler_driver->GetDumpPasses()),
- timing_logger_(method_name, true, true),
+ timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
disasm_info_(graph->GetArena()),
visualizer_enabled_(!compiler_driver->GetDumpCfgFileName().empty()),
visualizer_(visualizer_output, graph, *codegen),
graph_in_bad_state_(false) {
if (timing_logger_enabled_ || visualizer_enabled_) {
- if (!IsVerboseMethod(compiler_driver, method_name)) {
+ if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
timing_logger_enabled_ = visualizer_enabled_ = false;
}
if (visualizer_enabled_) {
- visualizer_.PrintHeader(method_name_);
+ visualizer_.PrintHeader(GetMethodName());
codegen->SetDisassemblyInformation(&disasm_info_);
}
}
@@ -134,7 +138,7 @@ class PassObserver : public ValueObject {
~PassObserver() {
if (timing_logger_enabled_) {
- LOG(INFO) << "TIMINGS " << method_name_;
+ LOG(INFO) << "TIMINGS " << GetMethodName();
LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
}
}
@@ -147,6 +151,14 @@ class PassObserver : public ValueObject {
void SetGraphInBadState() { graph_in_bad_state_ = true; }
+ const char* GetMethodName() {
+ // PrettyMethod() is expensive, so we delay calling it until we actually have to.
+ if (cached_method_name_.empty()) {
+ cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+ }
+ return cached_method_name_.c_str();
+ }
+
private:
void StartPass(const char* pass_name) {
// Dump graph first, then start timer.
@@ -205,7 +217,8 @@ class PassObserver : public ValueObject {
}
HGraph* const graph_;
- const char* method_name_;
+
+ std::string cached_method_name_;
bool timing_logger_enabled_;
TimingLogger timing_logger_;
@@ -258,15 +271,6 @@ class OptimizingCompiler FINAL : public Compiler {
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const OVERRIDE;
- CompiledMethod* TryCompile(const DexFile::CodeItem* code_item,
- uint32_t access_flags,
- InvokeType invoke_type,
- uint16_t class_def_idx,
- uint32_t method_idx,
- jobject class_loader,
- const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const;
-
CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
const DexFile& dex_file) const OVERRIDE {
@@ -291,23 +295,45 @@ class OptimizingCompiler FINAL : public Compiler {
}
}
+ bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+ OVERRIDE
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
private:
// Whether we should run any optimization or register allocation. If false, will
// just run the code generation after the graph was built.
const bool run_optimizations_;
- // Optimize and compile `graph`.
- CompiledMethod* CompileOptimized(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* driver,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer) const;
-
- // Just compile without doing optimizations.
- CompiledMethod* CompileBaseline(CodeGenerator* codegen,
- CompilerDriver* driver,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer) const;
+ // Create a 'CompiledMethod' for an optimized graph.
+ CompiledMethod* EmitOptimized(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* driver) const;
+
+ // Create a 'CompiledMethod' for a non-optimized graph.
+ CompiledMethod* EmitBaseline(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* driver) const;
+
+ // Try compiling a method and return the code generator used for
+ // compiling it.
+ // This method:
+ // 1) Builds the graph. Returns null if it failed to build it.
+ // 2) If `run_optimizations_` is set:
+ // 2.1) Transform the graph to SSA. Returns null if it failed.
+ // 2.2) Run optimizations on the graph, including register allocator.
+ // 3) Generate code with the `code_allocator` provided.
+ CodeGenerator* TryCompile(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ const DexFile::CodeItem* code_item,
+ uint32_t access_flags,
+ InvokeType invoke_type,
+ uint16_t class_def_idx,
+ uint32_t method_idx,
+ jobject class_loader,
+ const DexFile& dex_file,
+ Handle<mirror::DexCache> dex_cache) const;
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -369,6 +395,15 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) {
|| instruction_set == kX86_64;
}
+// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
+// TODO: Add support for other architectures and remove this function
+static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
+ return instruction_set == kArm64
+ || instruction_set == kThumb2
+ || instruction_set == kX86
+ || instruction_set == kX86_64;
+}
+
static void RunOptimizations(HOptimization* optimizations[],
size_t length,
PassObserver* pass_observer) {
@@ -391,20 +426,9 @@ static void MaybeRunInliner(HGraph* graph,
if (!should_inline) {
return;
}
-
- ArenaAllocator* arena = graph->GetArena();
- HInliner* inliner = new (arena) HInliner(
+ HInliner* inliner = new (graph->GetArena()) HInliner(
graph, codegen, dex_compilation_unit, dex_compilation_unit, driver, handles, stats);
- ReferenceTypePropagation* type_propagation =
- new (arena) ReferenceTypePropagation(graph, handles,
- "reference_type_propagation_after_inlining");
-
- HOptimization* optimizations[] = {
- inliner,
- // Run another type propagation phase: inlining will open up more opportunities
- // to remove checkcast/instanceof and null checks.
- type_propagation,
- };
+ HOptimization* optimizations[] = { inliner };
RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
}
@@ -415,6 +439,17 @@ static void RunArchOptimizations(InstructionSet instruction_set,
PassObserver* pass_observer) {
ArenaAllocator* arena = graph->GetArena();
switch (instruction_set) {
+#ifdef ART_ENABLE_CODEGEN_arm
+ case kThumb2:
+ case kArm: {
+ arm::DexCacheArrayFixups* fixups = new (arena) arm::DexCacheArrayFixups(graph, stats);
+ HOptimization* arm_optimizations[] = {
+ fixups
+ };
+ RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
+ break;
+ }
+#endif
#ifdef ART_ENABLE_CODEGEN_arm64
case kArm64: {
arm64::InstructionSimplifierArm64* simplifier =
@@ -432,10 +467,9 @@ static void RunArchOptimizations(InstructionSet instruction_set,
#endif
#ifdef ART_ENABLE_CODEGEN_x86
case kX86: {
- x86::ConstantAreaFixups* constant_area_fixups =
- new (arena) x86::ConstantAreaFixups(graph, stats);
+ x86::PcRelativeFixups* pc_relative_fixups = new (arena) x86::PcRelativeFixups(graph, stats);
HOptimization* x86_optimizations[] = {
- constant_area_fixups
+ pc_relative_fixups
};
RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer);
break;
@@ -446,13 +480,32 @@ static void RunArchOptimizations(InstructionSet instruction_set,
}
}
+NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects.
+static void AllocateRegisters(HGraph* graph,
+ CodeGenerator* codegen,
+ PassObserver* pass_observer) {
+ PrepareForRegisterAllocation(graph).Run();
+ SsaLivenessAnalysis liveness(graph, codegen);
+ {
+ PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
+ liveness.Analyze();
+ }
+ {
+ PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
+ RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
+ }
+}
+
static void RunOptimizations(HGraph* graph,
CodeGenerator* codegen,
CompilerDriver* driver,
OptimizingCompilerStats* stats,
const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- StackHandleScopeCollection* handles) {
+ PassObserver* pass_observer) {
+ ScopedObjectAccess soa(Thread::Current());
+ StackHandleScopeCollection handles(soa.Self());
+ ScopedThreadSuspension sts(soa.Self(), kNative);
+
ArenaAllocator* arena = graph->GetArena();
HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
graph, stats, HDeadCodeElimination::kInitialDeadCodeEliminationPassName);
@@ -462,14 +515,15 @@ static void RunOptimizations(HGraph* graph,
InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
HBooleanSimplifier* boolean_simplify = new (arena) HBooleanSimplifier(graph);
HConstantFolding* fold2 = new (arena) HConstantFolding(graph, "constant_folding_after_inlining");
+ HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding_after_bce");
SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
LICM* licm = new (arena) LICM(graph, *side_effects);
LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
- BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, induction);
+ BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
ReferenceTypePropagation* type_propagation =
- new (arena) ReferenceTypePropagation(graph, handles);
+ new (arena) ReferenceTypePropagation(graph, &handles);
HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
graph, stats, "instruction_simplifier_after_types");
@@ -492,12 +546,13 @@ static void RunOptimizations(HGraph* graph,
RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer);
- MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, handles);
+ MaybeRunInliner(graph, codegen, driver, stats, dex_compilation_unit, pass_observer, &handles);
// TODO: Update passes incompatible with try/catch so we have the same
// pipeline for all methods.
if (graph->HasTryCatch()) {
HOptimization* optimizations2[] = {
+ boolean_simplify,
side_effects,
gvn,
dce2,
@@ -519,6 +574,7 @@ static void RunOptimizations(HGraph* graph,
licm,
induction,
bce,
+ fold3, // evaluates code generated by dynamic bce
simplify3,
lse,
dce2,
@@ -532,6 +588,7 @@ static void RunOptimizations(HGraph* graph,
}
RunArchOptimizations(driver->GetInstructionSet(), graph, stats, pass_observer);
+ AllocateRegisters(graph, codegen, pass_observer);
}
// The stack map we generate must be 4-byte aligned on ARM. Since existing
@@ -545,22 +602,6 @@ static ArrayRef<const uint8_t> AlignVectorSize(ArenaVector<uint8_t>& vector) {
return ArrayRef<const uint8_t>(vector);
}
-NO_INLINE // Avoid increasing caller's frame size by large stack-allocated objects.
-static void AllocateRegisters(HGraph* graph,
- CodeGenerator* codegen,
- PassObserver* pass_observer) {
- PrepareForRegisterAllocation(graph).Run();
- SsaLivenessAnalysis liveness(graph, codegen);
- {
- PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer);
- liveness.Analyze();
- }
- {
- PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer);
- RegisterAllocator(graph->GetArena(), codegen, liveness).AllocateRegisters();
- }
-}
-
static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) {
ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter());
codegen->EmitLinkerPatches(&linker_patches);
@@ -574,74 +615,40 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen)
return linker_patches;
}
-CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer) const {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScopeCollection handles(soa.Self());
- soa.Self()->TransitionFromRunnableToSuspended(kNative);
- RunOptimizations(graph,
- codegen,
- compiler_driver,
- compilation_stats_.get(),
- dex_compilation_unit,
- pass_observer,
- &handles);
-
- AllocateRegisters(graph, codegen, pass_observer);
-
- ArenaAllocator* arena = graph->GetArena();
- CodeVectorAllocator allocator(arena);
- DefaultSrcMap src_mapping_table;
- codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
- ? &src_mapping_table
- : nullptr);
- codegen->CompileOptimized(&allocator);
-
+CompiledMethod* OptimizingCompiler::EmitOptimized(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ CodeGenerator* codegen,
+ CompilerDriver* compiler_driver) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
-
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
- codegen->BuildStackMaps(&stack_map);
-
- MaybeRecordStat(MethodCompilationStat::kCompiledOptimized);
+ stack_map.resize(codegen->ComputeStackMapsSize());
+ codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(allocator.GetMemory()),
+ ArrayRef<const uint8_t>(code_allocator->GetMemory()),
// Follow Quick's behavior and set the frame size to zero if it is
// considered "empty" (see the definition of
// art::CodeGenerator::HasEmptyFrame).
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const SrcMapElem>(src_mapping_table),
+ ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
ArrayRef<const uint8_t>(), // mapping_table.
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(), // native_gc_map.
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const LinkerPatch>(linker_patches));
- pass_observer->DumpDisassembly();
- soa.Self()->TransitionFromSuspendedToRunnable();
return compiled_method;
}
-CompiledMethod* OptimizingCompiler::CompileBaseline(
+CompiledMethod* OptimizingCompiler::EmitBaseline(
+ ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer) const {
- ArenaAllocator* arena = codegen->GetGraph()->GetArena();
- CodeVectorAllocator allocator(arena);
- DefaultSrcMap src_mapping_table;
- codegen->SetSrcMap(compiler_driver->GetCompilerOptions().GetGenerateDebugInfo()
- ? &src_mapping_table
- : nullptr);
- codegen->CompileBaseline(&allocator);
-
+ CompilerDriver* compiler_driver) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> mapping_table(arena->Adapter(kArenaAllocBaselineMaps));
@@ -649,44 +656,43 @@ CompiledMethod* OptimizingCompiler::CompileBaseline(
ArenaVector<uint8_t> vmap_table(arena->Adapter(kArenaAllocBaselineMaps));
codegen->BuildVMapTable(&vmap_table);
ArenaVector<uint8_t> gc_map(arena->Adapter(kArenaAllocBaselineMaps));
- codegen->BuildNativeGCMap(&gc_map, dex_compilation_unit);
+ codegen->BuildNativeGCMap(&gc_map, *compiler_driver);
- MaybeRecordStat(MethodCompilationStat::kCompiledBaseline);
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(allocator.GetMemory()),
+ ArrayRef<const uint8_t>(code_allocator->GetMemory()),
// Follow Quick's behavior and set the frame size to zero if it is
// considered "empty" (see the definition of
// art::CodeGenerator::HasEmptyFrame).
codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
- ArrayRef<const SrcMapElem>(src_mapping_table),
+ ArrayRef<const SrcMapElem>(codegen->GetSrcMappingTable()),
AlignVectorSize(mapping_table),
AlignVectorSize(vmap_table),
AlignVectorSize(gc_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const LinkerPatch>(linker_patches));
- pass_observer->DumpDisassembly();
return compiled_method;
}
-CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_item,
- uint32_t access_flags,
- InvokeType invoke_type,
- uint16_t class_def_idx,
- uint32_t method_idx,
- jobject class_loader,
- const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const {
- std::string method_name = PrettyMethod(method_idx, dex_file);
+CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena,
+ CodeVectorAllocator* code_allocator,
+ const DexFile::CodeItem* code_item,
+ uint32_t access_flags,
+ InvokeType invoke_type,
+ uint16_t class_def_idx,
+ uint32_t method_idx,
+ jobject class_loader,
+ const DexFile& dex_file,
+ Handle<mirror::DexCache> dex_cache) const {
MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
- // Always use the thumb2 assembler: some runtime functionality (like implicit stack
- // overflow checks) assume thumb2.
+ // Always use the Thumb-2 assembler: some runtime functionality
+ // (like implicit stack overflow checks) assume Thumb-2.
if (instruction_set == kArm) {
instruction_set = kThumb2;
}
@@ -697,6 +703,12 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
return nullptr;
}
+ // When read barriers are enabled, do not attempt to compile for
+ // instruction sets that have no read barrier support.
+ if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) {
+ return nullptr;
+ }
+
if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
return nullptr;
@@ -721,13 +733,10 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
&& compiler_driver->RequiresConstructorBarrier(Thread::Current(),
dex_compilation_unit.GetDexFile(),
dex_compilation_unit.GetClassDefIndex());
- ArenaAllocator arena(Runtime::Current()->GetArenaPool());
- HGraph* graph = new (&arena) HGraph(
- &arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
+ HGraph* graph = new (arena) HGraph(
+ arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
- bool shouldOptimize = method_name.find("$opt$reg$") != std::string::npos && run_optimizations_;
-
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
instruction_set,
@@ -741,7 +750,6 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
compiler_driver->GetCompilerOptions().GetGenerateDebugInfo());
PassObserver pass_observer(graph,
- method_name.c_str(),
codegen.get(),
visualizer_output_.get(),
compiler_driver);
@@ -769,7 +777,7 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
interpreter_metadata,
dex_cache);
- VLOG(compiler) << "Building " << method_name;
+ VLOG(compiler) << "Building " << pass_observer.GetMethodName();
{
PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer);
@@ -779,58 +787,40 @@ CompiledMethod* OptimizingCompiler::TryCompile(const DexFile::CodeItem* code_ite
}
}
- bool can_allocate_registers = RegisterAllocator::CanAllocateRegistersFor(*graph, instruction_set);
-
- // `run_optimizations_` is set explicitly (either through a compiler filter
- // or the debuggable flag). If it is set, we can run baseline. Otherwise, we fall back
- // to Quick.
- bool can_use_baseline = !run_optimizations_ && builder.CanUseBaselineForStringInit();
- CompiledMethod* compiled_method = nullptr;
- if (run_optimizations_ && can_allocate_registers) {
- VLOG(compiler) << "Optimizing " << method_name;
-
+ VLOG(compiler) << "Optimizing " << pass_observer.GetMethodName();
+ if (run_optimizations_) {
{
PassScope scope(SsaBuilder::kSsaBuilderPassName, &pass_observer);
if (!graph->TryBuildingSsa()) {
// We could not transform the graph to SSA, bailout.
- LOG(INFO) << "Skipping compilation of " << method_name << ": it contains a non natural loop";
+ LOG(INFO) << "Skipping compilation of " << pass_observer.GetMethodName()
+ << ": it contains a non natural loop";
MaybeRecordStat(MethodCompilationStat::kNotCompiledCannotBuildSSA);
pass_observer.SetGraphInBadState();
return nullptr;
}
}
- compiled_method = CompileOptimized(graph,
- codegen.get(),
- compiler_driver,
- dex_compilation_unit,
- &pass_observer);
- } else if (shouldOptimize && can_allocate_registers) {
- LOG(FATAL) << "Could not allocate registers in optimizing compiler";
- UNREACHABLE();
- } else if (can_use_baseline) {
- VLOG(compiler) << "Compile baseline " << method_name;
-
- if (!run_optimizations_) {
- MaybeRecordStat(MethodCompilationStat::kNotOptimizedDisabled);
- } else if (!can_allocate_registers) {
- MaybeRecordStat(MethodCompilationStat::kNotOptimizedRegisterAllocator);
- }
-
- compiled_method = CompileBaseline(codegen.get(),
- compiler_driver,
- dex_compilation_unit,
- &pass_observer);
+ RunOptimizations(graph,
+ codegen.get(),
+ compiler_driver,
+ compilation_stats_.get(),
+ dex_compilation_unit,
+ &pass_observer);
+ codegen->CompileOptimized(code_allocator);
+ } else {
+ codegen->CompileBaseline(code_allocator);
}
+ pass_observer.DumpDisassembly();
if (kArenaAllocatorCountAllocations) {
- if (arena.BytesAllocated() > 4 * MB) {
- MemStats mem_stats(arena.GetMemStats());
+ if (arena->BytesAllocated() > 4 * MB) {
+ MemStats mem_stats(arena->GetMemStats());
LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
}
}
- return compiled_method;
+ return codegen.release();
}
static bool CanHandleVerificationFailure(const VerifiedMethod* verified_method) {
@@ -852,33 +842,50 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
Handle<mirror::DexCache> dex_cache) const {
CompilerDriver* compiler_driver = GetCompilerDriver();
CompiledMethod* method = nullptr;
- if (Runtime::Current()->IsAotCompiler()) {
- const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
- DCHECK(!verified_method->HasRuntimeThrow());
- if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
- || CanHandleVerificationFailure(verified_method)) {
- method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
- method_idx, jclass_loader, dex_file, dex_cache);
- } else {
- if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+ DCHECK(Runtime::Current()->IsAotCompiler());
+ const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+ DCHECK(!verified_method->HasRuntimeThrow());
+ if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file)
+ || CanHandleVerificationFailure(verified_method)) {
+ ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+ CodeVectorAllocator code_allocator(&arena);
+ std::unique_ptr<CodeGenerator> codegen(
+ TryCompile(&arena,
+ &code_allocator,
+ code_item,
+ access_flags,
+ invoke_type,
+ class_def_idx,
+ method_idx,
+ jclass_loader,
+ dex_file,
+ dex_cache));
+ if (codegen.get() != nullptr) {
+ MaybeRecordStat(MethodCompilationStat::kCompiled);
+ if (run_optimizations_) {
+ method = EmitOptimized(&arena, &code_allocator, codegen.get(), compiler_driver);
} else {
- MaybeRecordStat(MethodCompilationStat::kNotCompiledClassNotVerified);
+ method = EmitBaseline(&arena, &code_allocator, codegen.get(), compiler_driver);
}
}
} else {
- // This is for the JIT compiler, which has already ensured the class is verified.
- // We can go straight to compiling.
- DCHECK(Runtime::Current()->UseJit());
- method = TryCompile(code_item, access_flags, invoke_type, class_def_idx,
- method_idx, jclass_loader, dex_file, dex_cache);
+ if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime);
+ } else {
+ MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError);
+ }
}
if (kIsDebugBuild &&
IsCompilingWithCoreImage() &&
- IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
- // For testing purposes, we put a special marker on method names that should be compiled
- // with this compiler. This makes sure we're not regressing.
+ IsInstructionSetSupported(compiler_driver->GetInstructionSet()) &&
+ (!kEmitCompilerReadBarrier ||
+ InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) {
+ // For testing purposes, we put a special marker on method names
+ // that should be compiled with this compiler (when the the
+ // instruction set is supported -- and has support for read
+ // barriers, if they are enabled). This makes sure we're not
+ // regressing.
std::string method_name = PrettyMethod(method_idx, dex_file);
bool shouldCompile = method_name.find("$opt$") != std::string::npos;
DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
@@ -896,4 +903,70 @@ bool IsCompilingWithCoreImage() {
return EndsWith(image, "core.art") || EndsWith(image, "core-optimizing.art");
}
+bool OptimizingCompiler::JitCompile(Thread* self,
+ jit::JitCodeCache* code_cache,
+ ArtMethod* method) {
+ StackHandleScope<2> hs(self);
+ Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+ method->GetDeclaringClass()->GetClassLoader()));
+ Handle<mirror::DexCache> dex_cache(hs.NewHandle(method->GetDexCache()));
+
+ jobject jclass_loader = class_loader.ToJObject();
+ const DexFile* dex_file = method->GetDexFile();
+ const uint16_t class_def_idx = method->GetClassDefIndex();
+ const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+ const uint32_t method_idx = method->GetDexMethodIndex();
+ const uint32_t access_flags = method->GetAccessFlags();
+ const InvokeType invoke_type = method->GetInvokeType();
+
+ ArenaAllocator arena(Runtime::Current()->GetArenaPool());
+ CodeVectorAllocator code_allocator(&arena);
+ std::unique_ptr<CodeGenerator> codegen;
+ {
+ // Go to native so that we don't block GC during compilation.
+ ScopedThreadSuspension sts(self, kNative);
+
+ DCHECK(run_optimizations_);
+ codegen.reset(
+ TryCompile(&arena,
+ &code_allocator,
+ code_item,
+ access_flags,
+ invoke_type,
+ class_def_idx,
+ method_idx,
+ jclass_loader,
+ *dex_file,
+ dex_cache));
+ if (codegen.get() == nullptr) {
+ return false;
+ }
+ }
+
+ size_t stack_map_size = codegen->ComputeStackMapsSize();
+ uint8_t* stack_map_data = code_cache->ReserveData(self, stack_map_size);
+ if (stack_map_data == nullptr) {
+ return false;
+ }
+ codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+ const void* code = code_cache->CommitCode(
+ self,
+ method,
+ nullptr,
+ stack_map_data,
+ nullptr,
+ codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
+ codegen->GetCoreSpillMask(),
+ codegen->GetFpuSpillMask(),
+ code_allocator.GetMemory().data(),
+ code_allocator.GetSize());
+
+ if (code == nullptr) {
+ code_cache->ClearData(self, stack_map_data);
+ return false;
+ }
+
+ return true;
+}
+
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 6375cf1a56..e5ea0f576b 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_
-#include <sstream>
+#include <iomanip>
#include <string>
#include <type_traits>
@@ -27,18 +27,18 @@ namespace art {
enum MethodCompilationStat {
kAttemptCompilation = 0,
- kCompiledBaseline,
- kCompiledOptimized,
+ kCompiled,
kInlinedInvoke,
kInstructionSimplifications,
kInstructionSimplificationsArch,
kUnresolvedMethod,
kUnresolvedField,
kUnresolvedFieldNotAFastAccess,
+ kRemovedCheckedCast,
+ kRemovedDeadInstruction,
+ kRemovedNullCheck,
kNotCompiledBranchOutsideMethodCode,
kNotCompiledCannotBuildSSA,
- kNotCompiledCantAccesType,
- kNotCompiledClassNotVerified,
kNotCompiledHugeMethod,
kNotCompiledLargeMethodNoBranches,
kNotCompiledMalformedOpcode,
@@ -47,13 +47,8 @@ enum MethodCompilationStat {
kNotCompiledSpaceFilter,
kNotCompiledUnhandledInstruction,
kNotCompiledUnsupportedIsa,
+ kNotCompiledVerificationError,
kNotCompiledVerifyAtRuntime,
- kNotOptimizedDisabled,
- kNotOptimizedRegisterAllocator,
- kNotOptimizedTryCatch,
- kRemovedCheckedCast,
- kRemovedDeadInstruction,
- kRemovedNullCheck,
kLastStat
};
@@ -66,20 +61,19 @@ class OptimizingCompilerStats {
}
void Log() const {
+ if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
+ // Log only in debug builds or if the compiler is verbose.
+ return;
+ }
+
if (compile_stats_[kAttemptCompilation] == 0) {
LOG(INFO) << "Did not compile any method.";
} else {
- size_t unoptimized_percent =
- compile_stats_[kCompiledBaseline] * 100 / compile_stats_[kAttemptCompilation];
- size_t optimized_percent =
- compile_stats_[kCompiledOptimized] * 100 / compile_stats_[kAttemptCompilation];
- std::ostringstream oss;
- oss << "Attempted compilation of " << compile_stats_[kAttemptCompilation] << " methods: ";
-
- oss << unoptimized_percent << "% (" << compile_stats_[kCompiledBaseline] << ") unoptimized, ";
- oss << optimized_percent << "% (" << compile_stats_[kCompiledOptimized] << ") optimized, ";
-
- LOG(INFO) << oss.str();
+ float compiled_percent =
+ compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation];
+ LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation]
+ << " methods: " << std::fixed << std::setprecision(2)
+ << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled.";
for (int i = 0; i < kLastStat; i++) {
if (compile_stats_[i] != 0) {
@@ -92,41 +86,38 @@ class OptimizingCompilerStats {
private:
std::string PrintMethodCompilationStat(MethodCompilationStat stat) const {
+ std::string name;
switch (stat) {
- case kAttemptCompilation : return "kAttemptCompilation";
- case kCompiledBaseline : return "kCompiledBaseline";
- case kCompiledOptimized : return "kCompiledOptimized";
- case kInlinedInvoke : return "kInlinedInvoke";
- case kInstructionSimplifications: return "kInstructionSimplifications";
- case kInstructionSimplificationsArch: return "kInstructionSimplificationsArch";
- case kUnresolvedMethod : return "kUnresolvedMethod";
- case kUnresolvedField : return "kUnresolvedField";
- case kUnresolvedFieldNotAFastAccess : return "kUnresolvedFieldNotAFastAccess";
- case kNotCompiledBranchOutsideMethodCode: return "kNotCompiledBranchOutsideMethodCode";
- case kNotCompiledCannotBuildSSA : return "kNotCompiledCannotBuildSSA";
- case kNotCompiledCantAccesType : return "kNotCompiledCantAccesType";
- case kNotCompiledClassNotVerified : return "kNotCompiledClassNotVerified";
- case kNotCompiledHugeMethod : return "kNotCompiledHugeMethod";
- case kNotCompiledLargeMethodNoBranches : return "kNotCompiledLargeMethodNoBranches";
- case kNotCompiledMalformedOpcode : return "kNotCompiledMalformedOpcode";
- case kNotCompiledNoCodegen : return "kNotCompiledNoCodegen";
- case kNotCompiledPathological : return "kNotCompiledPathological";
- case kNotCompiledSpaceFilter : return "kNotCompiledSpaceFilter";
- case kNotCompiledUnhandledInstruction : return "kNotCompiledUnhandledInstruction";
- case kNotCompiledUnsupportedIsa : return "kNotCompiledUnsupportedIsa";
- case kNotCompiledVerifyAtRuntime : return "kNotCompiledVerifyAtRuntime";
- case kNotOptimizedDisabled : return "kNotOptimizedDisabled";
- case kNotOptimizedRegisterAllocator : return "kNotOptimizedRegisterAllocator";
- case kNotOptimizedTryCatch : return "kNotOptimizedTryCatch";
- case kRemovedCheckedCast: return "kRemovedCheckedCast";
- case kRemovedDeadInstruction: return "kRemovedDeadInstruction";
- case kRemovedNullCheck: return "kRemovedNullCheck";
-
- case kLastStat: break; // Invalid to print out.
+ case kAttemptCompilation : name = "AttemptCompilation"; break;
+ case kCompiled : name = "Compiled"; break;
+ case kInlinedInvoke : name = "InlinedInvoke"; break;
+ case kInstructionSimplifications: name = "InstructionSimplifications"; break;
+ case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break;
+ case kUnresolvedMethod : name = "UnresolvedMethod"; break;
+ case kUnresolvedField : name = "UnresolvedField"; break;
+ case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break;
+ case kRemovedCheckedCast: name = "RemovedCheckedCast"; break;
+ case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break;
+ case kRemovedNullCheck: name = "RemovedNullCheck"; break;
+ case kNotCompiledBranchOutsideMethodCode: name = "NotCompiledBranchOutsideMethodCode"; break;
+ case kNotCompiledCannotBuildSSA : name = "NotCompiledCannotBuildSSA"; break;
+ case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break;
+ case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break;
+ case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break;
+ case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break;
+ case kNotCompiledPathological : name = "NotCompiledPathological"; break;
+ case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break;
+ case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break;
+ case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break;
+ case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break;
+ case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break;
+
+ case kLastStat:
+ LOG(FATAL) << "invalid stat "
+ << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
+ UNREACHABLE();
}
- LOG(FATAL) << "invalid stat "
- << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat);
- UNREACHABLE();
+ return "OptStat#" + name;
}
AtomicInteger compile_stats_[kLastStat];
diff --git a/compiler/optimizing/constant_area_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index c3470002c5..b383f1e1ad 100644
--- a/compiler/optimizing/constant_area_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "constant_area_fixups_x86.h"
+#include "pc_relative_fixups_x86.h"
namespace art {
namespace x86 {
@@ -22,9 +22,18 @@ namespace x86 {
/**
* Finds instructions that need the constant area base as an input.
*/
-class ConstantHandlerVisitor : public HGraphVisitor {
+class PCRelativeHandlerVisitor : public HGraphVisitor {
public:
- explicit ConstantHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+ explicit PCRelativeHandlerVisitor(HGraph* graph) : HGraphVisitor(graph), base_(nullptr) {}
+
+ void MoveBaseIfNeeded() {
+ if (base_ != nullptr) {
+ // Bring the base closer to the first use (previously, it was in the
+ // entry block) and relieve some pressure on the register allocator
+ // while avoiding recalculation of the base in a loop.
+ base_->MoveBeforeFirstUserAndOutOfLoops();
+ }
+ }
private:
void VisitAdd(HAdd* add) OVERRIDE {
@@ -72,7 +81,7 @@ class ConstantHandlerVisitor : public HGraphVisitor {
void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
// We need to replace the HPackedSwitch with a HX86PackedSwitch in order to
// address the constant area.
- InitializeConstantAreaPointer(switch_insn);
+ InitializePCRelativeBasePointer();
HGraph* graph = GetGraph();
HBasicBlock* block = switch_insn->GetBlock();
HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch(
@@ -84,31 +93,38 @@ class ConstantHandlerVisitor : public HGraphVisitor {
block->ReplaceAndRemoveInstructionWith(switch_insn, x86_switch);
}
- void InitializeConstantAreaPointer(HInstruction* user) {
+ void InitializePCRelativeBasePointer() {
// Ensure we only initialize the pointer once.
if (base_ != nullptr) {
return;
}
- HGraph* graph = GetGraph();
- HBasicBlock* entry = graph->GetEntryBlock();
- base_ = new (graph->GetArena()) HX86ComputeBaseMethodAddress();
- HInstruction* insert_pos = (user->GetBlock() == entry) ? user : entry->GetLastInstruction();
- entry->InsertInstructionBefore(base_, insert_pos);
+ // Insert the base at the start of the entry block, move it to a better
+ // position later in MoveBaseIfNeeded().
+ base_ = new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress();
+ HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
+ entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction());
DCHECK(base_ != nullptr);
}
void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) {
- InitializeConstantAreaPointer(insn);
- HGraph* graph = GetGraph();
- HBasicBlock* block = insn->GetBlock();
+ InitializePCRelativeBasePointer();
HX86LoadFromConstantTable* load_constant =
- new (graph->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
- block->InsertInstructionBefore(load_constant, insn);
+ new (GetGraph()->GetArena()) HX86LoadFromConstantTable(base_, value, materialize);
+ insn->GetBlock()->InsertInstructionBefore(load_constant, insn);
insn->ReplaceInput(load_constant, input_index);
}
void HandleInvoke(HInvoke* invoke) {
+ // If this is an invoke-static/-direct with PC-relative dex cache array
+ // addressing, we need the PC-relative address base.
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasPcRelativeDexCache()) {
+ InitializePCRelativeBasePointer();
+ // Add the extra parameter base_.
+ DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
+ invoke_static_or_direct->AddSpecialInput(base_);
+ }
// Ensure that we can load FP arguments from the constant area.
for (size_t i = 0, e = invoke->InputCount(); i < e; i++) {
HConstant* input = invoke->InputAt(i)->AsConstant();
@@ -123,9 +139,10 @@ class ConstantHandlerVisitor : public HGraphVisitor {
HX86ComputeBaseMethodAddress* base_;
};
-void ConstantAreaFixups::Run() {
- ConstantHandlerVisitor visitor(graph_);
+void PcRelativeFixups::Run() {
+ PCRelativeHandlerVisitor visitor(graph_);
visitor.VisitInsertionOrder();
+ visitor.MoveBaseIfNeeded();
}
} // namespace x86
diff --git a/compiler/optimizing/constant_area_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 4138039cdd..af708acb03 100644
--- a/compiler/optimizing/constant_area_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -14,8 +14,8 @@
* limitations under the License.
*/
-#ifndef ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
-#define ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#ifndef ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
+#define ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
#include "nodes.h"
#include "optimization.h"
@@ -23,10 +23,10 @@
namespace art {
namespace x86 {
-class ConstantAreaFixups : public HOptimization {
+class PcRelativeFixups : public HOptimization {
public:
- ConstantAreaFixups(HGraph* graph, OptimizingCompilerStats* stats)
- : HOptimization(graph, "constant_area_fixups_x86", stats) {}
+ PcRelativeFixups(HGraph* graph, OptimizingCompilerStats* stats)
+ : HOptimization(graph, "pc_relative_fixups_x86", stats) {}
void Run() OVERRIDE;
};
@@ -34,4 +34,4 @@ class ConstantAreaFixups : public HOptimization {
} // namespace x86
} // namespace art
-#endif // ART_COMPILER_OPTIMIZING_CONSTANT_AREA_FIXUPS_X86_H_
+#endif // ART_COMPILER_OPTIMIZING_PC_RELATIVE_FIXUPS_X86_H_
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index ca928ae0f2..d1770b75ab 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -48,16 +48,85 @@ void PrepareForRegisterAllocation::VisitBoundType(HBoundType* bound_type) {
}
void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
- HLoadClass* cls = check->GetLoadClass();
- check->ReplaceWith(cls);
- if (check->GetPrevious() == cls) {
+ // Try to find a static invoke or a new-instance from which this check originated.
+ HInstruction* implicit_clinit = nullptr;
+ for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); it.Advance()) {
+ HInstruction* user = it.Current()->GetUser();
+ if ((user->IsInvokeStaticOrDirect() || user->IsNewInstance()) &&
+ CanMoveClinitCheck(check, user)) {
+ implicit_clinit = user;
+ if (user->IsInvokeStaticOrDirect()) {
+ DCHECK(user->AsInvokeStaticOrDirect()->IsStaticWithExplicitClinitCheck());
+ user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
+ } else {
+ DCHECK(user->IsNewInstance());
+ // We delegate the initialization duty to the allocation.
+ if (user->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectInitialized) {
+ user->AsNewInstance()->SetEntrypoint(kQuickAllocObjectResolved);
+ }
+ }
+ break;
+ }
+ }
+ // If we found a static invoke or new-instance for merging, remove the check
+ // from dominated static invokes.
+ if (implicit_clinit != nullptr) {
+ for (HUseIterator<HInstruction*> it(check->GetUses()); !it.Done(); ) {
+ HInstruction* user = it.Current()->GetUser();
+ // All other uses must be dominated.
+ DCHECK(implicit_clinit->StrictlyDominates(user) || (implicit_clinit == user));
+ it.Advance(); // Advance before we remove the node, reference to the next node is preserved.
+ if (user->IsInvokeStaticOrDirect()) {
+ user->AsInvokeStaticOrDirect()->RemoveExplicitClinitCheck(
+ HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+ }
+ }
+ }
+
+ HLoadClass* load_class = check->GetLoadClass();
+ bool can_merge_with_load_class = CanMoveClinitCheck(load_class, check);
+
+ check->ReplaceWith(load_class);
+
+ if (implicit_clinit != nullptr) {
+ // Remove the check from the graph. It has been merged into the invoke or new-instance.
+ check->GetBlock()->RemoveInstruction(check);
+ // Check if we can merge the load class as well.
+ if (can_merge_with_load_class && !load_class->HasUses()) {
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
+ } else if (can_merge_with_load_class) {
// Pass the initialization duty to the `HLoadClass` instruction,
// and remove the instruction from the graph.
- cls->SetMustGenerateClinitCheck(true);
+ load_class->SetMustGenerateClinitCheck(true);
check->GetBlock()->RemoveInstruction(check);
}
}
+void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
+ HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
+ bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+ // Change the entrypoint to kQuickAllocObject if either:
+ // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
+ // - the class needs access checks (we do not know if it's finalizable),
+ // - or the load class has only one use.
+ if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
+ instruction->SetEntrypoint(kQuickAllocObject);
+ instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
+ // The allocation entry point that deals with access checks does not work with inlined
+ // methods, so we need to check whether this allocation comes from an inlined method.
+ if (has_only_one_use && !instruction->GetEnvironment()->IsFromInlinedInvoke()) {
+ // We can remove the load class from the graph. If it needed access checks, we delegate
+ // the access check to the allocation.
+ if (load_class->NeedsAccessCheck()) {
+ instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+ }
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
+ }
+}
+
void PrepareForRegisterAllocation::VisitCondition(HCondition* condition) {
bool needs_materialization = false;
if (!condition->GetUses().HasOnlyOneUse() || !condition->GetEnvUses().IsEmpty()) {
@@ -86,30 +155,60 @@ void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDire
DCHECK(last_input != nullptr)
<< "Last input is not HLoadClass. It is " << last_input->DebugName();
- // Remove a load class instruction as last input of a static
- // invoke, which has been added (along with a clinit check,
- // removed by PrepareForRegisterAllocation::VisitClinitCheck
- // previously) by the graph builder during the creation of the
- // static invoke instruction, but is no longer required at this
- // stage (i.e., after inlining has been performed).
- invoke->RemoveLoadClassAsLastInput();
-
- // The static call will initialize the class so there's no need for a clinit check if
- // it's the first user.
- // There is one special case where we still need the clinit check, when inlining. Because
- // currently the callee is responsible for reporting parameters to the GC, the code
- // that walks the stack during `artQuickResolutionTrampoline` cannot be interrupted for GC.
- // Therefore we cannot allocate any object in that code, including loading a new class.
- if (last_input == invoke->GetPrevious() && !invoke->IsFromInlinedInvoke()) {
- last_input->SetMustGenerateClinitCheck(false);
-
- // If the load class instruction is no longer used, remove it from
- // the graph.
- if (!last_input->HasUses()) {
- last_input->GetBlock()->RemoveInstruction(last_input);
- }
+ // Detach the explicit class initialization check from the invoke.
+ // Keeping track of the initializing instruction is no longer required
+ // at this stage (i.e., after inlining has been performed).
+ invoke->RemoveExplicitClinitCheck(HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
+
+ // Merging with load class should have happened in VisitClinitCheck().
+ DCHECK(!CanMoveClinitCheck(last_input, invoke));
+ }
+}
+
+bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input, HInstruction* user) {
+ // Determine if input and user come from the same dex instruction, so that we can move
+ // the clinit check responsibility from one to the other, i.e. from HClinitCheck (user)
+ // to HLoadClass (input), or from HClinitCheck (input) to HInvokeStaticOrDirect (user).
+
+ // Start with a quick dex pc check.
+ if (user->GetDexPc() != input->GetDexPc()) {
+ return false;
+ }
+
+ // Now do a thorough environment check that this is really coming from the same instruction in
+ // the same inlined graph. Unfortunately, we have to go through the whole environment chain.
+ HEnvironment* user_environment = user->GetEnvironment();
+ HEnvironment* input_environment = input->GetEnvironment();
+ while (user_environment != nullptr || input_environment != nullptr) {
+ if (user_environment == nullptr || input_environment == nullptr) {
+ // Different environment chain length. This happens when a method is called
+ // once directly and once indirectly through another inlined method.
+ return false;
+ }
+ if (user_environment->GetDexPc() != input_environment->GetDexPc() ||
+ user_environment->GetMethodIdx() != input_environment->GetMethodIdx() ||
+ !IsSameDexFile(user_environment->GetDexFile(), input_environment->GetDexFile())) {
+ return false;
+ }
+ user_environment = user_environment->GetParent();
+ input_environment = input_environment->GetParent();
+ }
+
+ // Check for code motion taking the input to a different block.
+ if (user->GetBlock() != input->GetBlock()) {
+ return false;
+ }
+
+ // In debug mode, check that we have not inserted a throwing instruction
+ // or an instruction with side effects between input and user.
+ if (kIsDebugBuild) {
+ for (HInstruction* between = input->GetNext(); between != user; between = between->GetNext()) {
+ CHECK(between != nullptr); // User must be after input in the same block.
+ CHECK(!between->CanThrow());
+ CHECK(!between->HasSideEffects());
}
}
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index d7f277fa0d..9b2434250d 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -40,6 +40,9 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
void VisitCondition(HCondition* condition) OVERRIDE;
void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+ void VisitNewInstance(HNewInstance* instruction) OVERRIDE;
+
+ bool CanMoveClinitCheck(HInstruction* input, HInstruction* user);
DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
};
diff --git a/compiler/optimizing/primitive_type_propagation.cc b/compiler/optimizing/primitive_type_propagation.cc
index c98f43e461..bde54ee977 100644
--- a/compiler/optimizing/primitive_type_propagation.cc
+++ b/compiler/optimizing/primitive_type_propagation.cc
@@ -63,7 +63,6 @@ bool PrimitiveTypePropagation::UpdateType(HPhi* phi) {
: SsaBuilder::GetFloatOrDoubleEquivalent(phi, input, new_type);
phi->ReplaceInput(equivalent, i);
if (equivalent->IsPhi()) {
- equivalent->AsPhi()->SetLive();
AddToWorklist(equivalent->AsPhi());
} else if (equivalent == input) {
// The input has changed its type. It can be an input of other phis,
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 659da068a9..dd349240f7 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -99,17 +99,9 @@ ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
}
}
-void ReferenceTypePropagation::Run() {
- // To properly propagate type info we need to visit in the dominator-based order.
- // Reverse post order guarantees a node's dominators are visited first.
- // We take advantage of this order in `VisitBasicBlock`.
- for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
- VisitBasicBlock(it.Current());
- }
- ProcessWorklist();
-
+void ReferenceTypePropagation::ValidateTypes() {
+ // TODO: move this to the graph checker.
if (kIsDebugBuild) {
- // TODO: move this to the graph checker.
ScopedObjectAccess soa(Thread::Current());
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
@@ -135,6 +127,100 @@ void ReferenceTypePropagation::Run() {
}
}
+static void CheckHasNoTypedInputs(HInstruction* root_instr) {
+ ArenaAllocatorAdapter<void> adapter =
+ root_instr->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocReferenceTypePropagation);
+
+ ArenaVector<HPhi*> visited_phis(adapter);
+ ArenaVector<HInstruction*> worklist(adapter);
+ worklist.push_back(root_instr);
+
+ while (!worklist.empty()) {
+ HInstruction* instr = worklist.back();
+ worklist.pop_back();
+
+ if (instr->IsPhi() || instr->IsBoundType() || instr->IsNullCheck()) {
+ // Expect that both `root_instr` and its inputs have invalid RTI.
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(!instr->GetReferenceTypeInfo().IsValid()) << "Instruction should not have valid RTI.";
+
+ // Insert all unvisited inputs to the worklist.
+ for (HInputIterator it(instr); !it.Done(); it.Advance()) {
+ HInstruction* input = it.Current();
+ if (input->IsPhi()) {
+ if (ContainsElement(visited_phis, input->AsPhi())) {
+ continue;
+ } else {
+ visited_phis.push_back(input->AsPhi());
+ }
+ }
+ worklist.push_back(input);
+ }
+ } else if (instr->IsNullConstant()) {
+ // The only input of `root_instr` allowed to have valid RTI because it is ignored.
+ } else {
+ LOG(FATAL) << "Unexpected input " << instr->DebugName() << instr->GetId() << " with RTI "
+ << instr->GetReferenceTypeInfo();
+ UNREACHABLE();
+ }
+ }
+}
+
+template<typename Functor>
+static void ForEachUntypedInstruction(HGraph* graph, Functor fn) {
+ ScopedObjectAccess soa(Thread::Current());
+ for (HReversePostOrderIterator block_it(*graph); !block_it.Done(); block_it.Advance()) {
+ for (HInstructionIterator it(block_it.Current()->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+ fn(instr);
+ }
+ }
+ for (HInstructionIterator it(block_it.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) {
+ fn(instr);
+ }
+ }
+ }
+}
+
+void ReferenceTypePropagation::SetUntypedInstructionsToObject() {
+ // In some cases, the fix-point iteration will leave kPrimNot instructions with
+ // invalid RTI because bytecode does not provide enough typing information.
+ // Set the RTI of such instructions to Object.
+ // Example:
+ // MyClass a = null, b = null;
+ // while (a == null) {
+ // if (cond) { a = b; } else { b = a; }
+ // }
+
+ if (kIsDebugBuild) {
+ // Test that if we are going to set RTI from invalid to Object, that
+ // instruction did not have any typed instructions in its def-use chain
+ // and therefore its type could not be inferred.
+ ForEachUntypedInstruction(graph_, [](HInstruction* instr) { CheckHasNoTypedInputs(instr); });
+ }
+
+ ReferenceTypeInfo obj_rti = ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false);
+ ForEachUntypedInstruction(graph_, [obj_rti](HInstruction* instr) {
+ instr->SetReferenceTypeInfo(obj_rti);
+ });
+}
+
+void ReferenceTypePropagation::Run() {
+ // To properly propagate type info we need to visit in the dominator-based order.
+ // Reverse post order guarantees a node's dominators are visited first.
+ // We take advantage of this order in `VisitBasicBlock`.
+ for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
+ VisitBasicBlock(it.Current());
+ }
+
+ ProcessWorklist();
+ SetUntypedInstructionsToObject();
+ ValidateTypes();
+}
+
void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) {
RTPVisitor visitor(graph_,
handles_,
@@ -530,8 +616,9 @@ void RTPVisitor::VisitLoadException(HLoadException* instr) {
void RTPVisitor::VisitNullCheck(HNullCheck* instr) {
ScopedObjectAccess soa(Thread::Current());
ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
- DCHECK(parent_rti.IsValid());
- instr->SetReferenceTypeInfo(parent_rti);
+ if (parent_rti.IsValid()) {
+ instr->SetReferenceTypeInfo(parent_rti);
+ }
}
void RTPVisitor::VisitFakeString(HFakeString* instr) {
@@ -584,11 +671,16 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) {
}
if (phi->GetBlock()->IsLoopHeader()) {
+ ScopedObjectAccess soa(Thread::Current());
// Set the initial type for the phi. Use the non back edge input for reaching
// a fixed point faster.
+ HInstruction* first_input = phi->InputAt(0);
+ ReferenceTypeInfo first_input_rti = first_input->GetReferenceTypeInfo();
+ if (first_input_rti.IsValid() && !first_input->IsNullConstant()) {
+ phi->SetCanBeNull(first_input->CanBeNull());
+ phi->SetReferenceTypeInfo(first_input_rti);
+ }
AddToWorklist(phi);
- phi->SetCanBeNull(phi->InputAt(0)->CanBeNull());
- phi->SetReferenceTypeInfo(phi->InputAt(0)->GetReferenceTypeInfo());
} else {
// Eagerly compute the type of the phi, for quicker convergence. Note
// that we don't need to add users to the worklist because we are
@@ -610,23 +702,36 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo&
}
bool is_exact = a.IsExact() && b.IsExact();
- Handle<mirror::Class> type_handle;
+ ReferenceTypeInfo::TypeHandle result_type_handle;
+ ReferenceTypeInfo::TypeHandle a_type_handle = a.GetTypeHandle();
+ ReferenceTypeInfo::TypeHandle b_type_handle = b.GetTypeHandle();
+ bool a_is_interface = a_type_handle->IsInterface();
+ bool b_is_interface = b_type_handle->IsInterface();
if (a.GetTypeHandle().Get() == b.GetTypeHandle().Get()) {
- type_handle = a.GetTypeHandle();
+ result_type_handle = a_type_handle;
} else if (a.IsSupertypeOf(b)) {
- type_handle = a.GetTypeHandle();
+ result_type_handle = a_type_handle;
is_exact = false;
} else if (b.IsSupertypeOf(a)) {
- type_handle = b.GetTypeHandle();
+ result_type_handle = b_type_handle;
+ is_exact = false;
+ } else if (!a_is_interface && !b_is_interface) {
+ result_type_handle = handles_->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle));
is_exact = false;
} else {
- // TODO: Find the first common super class.
- type_handle = object_class_handle_;
+ // This can happen if:
+ // - both types are interfaces. TODO(calin): implement
+ // - one is an interface, the other a class, and the type does not implement the interface
+ // e.g:
+ // void foo(Interface i, boolean cond) {
+ // Object o = cond ? i : new Object();
+ // }
+ result_type_handle = object_class_handle_;
is_exact = false;
}
- return ReferenceTypeInfo::Create(type_handle, is_exact);
+ return ReferenceTypeInfo::Create(result_type_handle, is_exact);
}
static void UpdateArrayGet(HArrayGet* instr,
@@ -636,7 +741,9 @@ static void UpdateArrayGet(HArrayGet* instr,
DCHECK_EQ(Primitive::kPrimNot, instr->GetType());
ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo();
- DCHECK(parent_rti.IsValid());
+ if (!parent_rti.IsValid()) {
+ return;
+ }
Handle<mirror::Class> handle = parent_rti.GetTypeHandle();
if (handle->IsObjectArrayClass()) {
@@ -648,8 +755,6 @@ static void UpdateArrayGet(HArrayGet* instr,
instr->SetReferenceTypeInfo(
ReferenceTypeInfo::Create(object_class_handle, /* is_exact */ false));
}
-
- return;
}
bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
@@ -666,7 +771,7 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) {
instr->SetReferenceTypeInfo(parent_rti);
}
} else if (instr->IsArrayGet()) {
- // TODO: consider if it's worth "looking back" and bounding the input object
+ // TODO: consider if it's worth "looking back" and binding the input object
// to an array type.
UpdateArrayGet(instr->AsArrayGet(), handles_, object_class_handle_);
} else {
@@ -694,6 +799,7 @@ void RTPVisitor::VisitArrayGet(HArrayGet* instr) {
if (instr->GetType() != Primitive::kPrimNot) {
return;
}
+
ScopedObjectAccess soa(Thread::Current());
UpdateArrayGet(instr, handles_, object_class_handle_);
if (!instr->GetReferenceTypeInfo().IsValid()) {
@@ -715,14 +821,35 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) {
instr->SetReferenceTypeInfo(new_rti);
}
+// NullConstant inputs are ignored during merging as they do not provide any useful information.
+// If all the inputs are NullConstants then the type of the phi will be set to Object.
void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
- ReferenceTypeInfo new_rti = instr->InputAt(0)->GetReferenceTypeInfo();
+ size_t input_count = instr->InputCount();
+ size_t first_input_index_not_null = 0;
+ while (first_input_index_not_null < input_count &&
+ instr->InputAt(first_input_index_not_null)->IsNullConstant()) {
+ first_input_index_not_null++;
+ }
+ if (first_input_index_not_null == input_count) {
+ // All inputs are NullConstants, set the type to object.
+ // This may happen in the presence of inlining.
+ instr->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(object_class_handle_, /* is_exact */ false));
+ return;
+ }
+
+ ReferenceTypeInfo new_rti = instr->InputAt(first_input_index_not_null)->GetReferenceTypeInfo();
+
if (new_rti.IsValid() && new_rti.IsObjectClass() && !new_rti.IsExact()) {
// Early return if we are Object and inexact.
instr->SetReferenceTypeInfo(new_rti);
return;
}
- for (size_t i = 1; i < instr->InputCount(); i++) {
+
+ for (size_t i = first_input_index_not_null + 1; i < input_count; i++) {
+ if (instr->InputAt(i)->IsNullConstant()) {
+ continue;
+ }
new_rti = MergeTypes(new_rti, instr->InputAt(i)->GetReferenceTypeInfo());
if (new_rti.IsValid() && new_rti.IsObjectClass()) {
if (!new_rti.IsExact()) {
@@ -732,7 +859,10 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) {
}
}
}
- instr->SetReferenceTypeInfo(new_rti);
+
+ if (new_rti.IsValid()) {
+ instr->SetReferenceTypeInfo(new_rti);
+ }
}
// Re-computes and updates the nullability of the instruction. Returns whether or
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 5493601adc..21789e1331 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -56,6 +56,9 @@ class ReferenceTypePropagation : public HOptimization {
ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b)
SHARED_REQUIRES(Locks::mutator_lock_);
+ void ValidateTypes();
+ void SetUntypedInstructionsToObject();
+
StackHandleScopeCollection* handles_;
ArenaVector<HInstruction*> worklist_;
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index ef22c816a0..d399bc2d7a 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -1525,7 +1525,7 @@ void RegisterAllocator::InsertParallelMoveAtExitOf(HBasicBlock* block,
DCHECK(IsValidDestination(destination)) << destination;
if (source.Equals(destination)) return;
- DCHECK_EQ(block->NumberOfNormalSuccessors(), 1u);
+ DCHECK_EQ(block->GetNormalSuccessors().size(), 1u);
HInstruction* last = block->GetLastInstruction();
// We insert moves at exit for phi predecessors and connecting blocks.
// A block ending with an if or a packed switch cannot branch to a block
@@ -1752,7 +1752,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
// If `from` has only one successor, we can put the moves at the exit of it. Otherwise
// we need to put the moves at the entry of `to`.
- if (from->NumberOfNormalSuccessors() == 1) {
+ if (from->GetNormalSuccessors().size() == 1) {
InsertParallelMoveAtExitOf(from,
interval->GetParent()->GetDefinedBy(),
source->ToLocation(),
@@ -1894,7 +1894,7 @@ void RegisterAllocator::Resolve() {
HInstruction* phi = inst_it.Current();
for (size_t i = 0, e = current->GetPredecessors().size(); i < e; ++i) {
HBasicBlock* predecessor = current->GetPredecessors()[i];
- DCHECK_EQ(predecessor->NumberOfNormalSuccessors(), 1u);
+ DCHECK_EQ(predecessor->GetNormalSuccessors().size(), 1u);
HInstruction* input = phi->InputAt(i);
Location source = input->GetLiveInterval()->GetLocationAt(
predecessor->GetLifetimeEnd() - 1);
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index a128079cdb..5e1d1d9954 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -49,7 +49,8 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
}
// TODO: Avoid CompilerDriver.
- InvokeType invoke_type = invoke->GetOriginalInvokeType();
+ InvokeType original_invoke_type = invoke->GetOriginalInvokeType();
+ InvokeType optimized_invoke_type = original_invoke_type;
MethodReference target_method(&graph_->GetDexFile(), invoke->GetDexMethodIndex());
int vtable_idx;
uintptr_t direct_code, direct_method;
@@ -58,15 +59,18 @@ void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
invoke->GetDexPc(),
false /* update_stats: already updated in builder */,
true /* enable_devirtualization */,
- &invoke_type,
+ &optimized_invoke_type,
&target_method,
&vtable_idx,
&direct_code,
&direct_method);
- DCHECK(success);
- DCHECK_EQ(invoke_type, invoke->GetInvokeType());
- DCHECK_EQ(target_method.dex_file, invoke->GetTargetMethod().dex_file);
- DCHECK_EQ(target_method.dex_method_index, invoke->GetTargetMethod().dex_method_index);
+ if (!success) {
+ // TODO: try using kDexCachePcRelative. It's always a valid method load
+ // kind as long as it's supported by the codegen
+ return;
+ }
+ invoke->SetOptimizedInvokeType(optimized_invoke_type);
+ invoke->SetTargetMethod(target_method);
HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index ec45d6b2ca..9bbc354290 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -129,13 +129,13 @@ TEST(SideEffectsTest, NoDependences) {
TEST(SideEffectsTest, VolatileDependences) {
SideEffects volatile_write =
- SideEffects::FieldWriteOfType(Primitive::kPrimInt, true);
+ SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true);
SideEffects any_write =
- SideEffects::FieldWriteOfType(Primitive::kPrimInt, false);
+ SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false);
SideEffects volatile_read =
- SideEffects::FieldReadOfType(Primitive::kPrimByte, true);
+ SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true);
SideEffects any_read =
- SideEffects::FieldReadOfType(Primitive::kPrimByte, false);
+ SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false);
EXPECT_FALSE(volatile_write.MayDependOn(any_read));
EXPECT_TRUE(any_read.MayDependOn(volatile_write));
@@ -151,15 +151,15 @@ TEST(SideEffectsTest, VolatileDependences) {
TEST(SideEffectsTest, SameWidthTypes) {
// Type I/F.
testWriteAndReadDependence(
- SideEffects::FieldWriteOfType(Primitive::kPrimInt, false),
- SideEffects::FieldReadOfType(Primitive::kPrimFloat, false));
+ SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false),
+ SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false));
testWriteAndReadDependence(
SideEffects::ArrayWriteOfType(Primitive::kPrimInt),
SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
// Type L/D.
testWriteAndReadDependence(
- SideEffects::FieldWriteOfType(Primitive::kPrimLong, false),
- SideEffects::FieldReadOfType(Primitive::kPrimDouble, false));
+ SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false),
+ SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false));
testWriteAndReadDependence(
SideEffects::ArrayWriteOfType(Primitive::kPrimLong),
SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
@@ -171,9 +171,9 @@ TEST(SideEffectsTest, AllWritesAndReads) {
for (Primitive::Type type = Primitive::kPrimNot;
type < Primitive::kPrimVoid;
type = Primitive::Type(type + 1)) {
- s = s.Union(SideEffects::FieldWriteOfType(type, false));
+ s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false));
s = s.Union(SideEffects::ArrayWriteOfType(type));
- s = s.Union(SideEffects::FieldReadOfType(type, false));
+ s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false));
s = s.Union(SideEffects::ArrayReadOfType(type));
}
EXPECT_TRUE(s.DoesAllReadWrite());
@@ -225,10 +225,10 @@ TEST(SideEffectsTest, BitStrings) {
"||DJ|||||", // note: DJ alias
SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str());
SideEffects s = SideEffects::None();
- s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, false));
- s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, false));
+ s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false));
+ s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false));
s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort));
- s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, false));
+ s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false));
s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat));
s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble));
EXPECT_STREQ(
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 4565590bc3..9e6cfbe653 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -22,6 +22,13 @@
namespace art {
+// Returns whether this is a loop header phi which was eagerly created but later
+// found inconsistent due to the vreg being undefined in one of its predecessors.
+// Such phi is marked dead and should be ignored until its removal in SsaPhiElimination.
+static bool IsUndefinedLoopHeaderPhi(HPhi* phi) {
+ return phi->IsLoopHeaderPhi() && phi->InputCount() != phi->GetBlock()->GetPredecessors().size();
+}
+
/**
* A debuggable application may require to reviving phis, to ensure their
* associated DEX register is available to a debugger. This class implements
@@ -165,17 +172,15 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) {
void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) {
for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
HPhi* phi = it.Current()->AsPhi();
+ if (IsUndefinedLoopHeaderPhi(phi)) {
+ DCHECK(phi->IsDead());
+ continue;
+ }
if (phi->IsDead() && phi->HasEnvironmentUses()) {
phi->SetLive();
if (block->IsLoopHeader()) {
- // Give a type to the loop phi to guarantee convergence of the algorithm.
- // Note that the dead phi may already have a type if it is an equivalent
- // generated for a typed LoadLocal. In that case we do not change the
- // type because it could lead to an unsupported PrimNot/Float/Double ->
- // PrimInt/Long transition and create same type equivalents.
- if (phi->GetType() == Primitive::kPrimVoid) {
- phi->SetType(phi->InputAt(0)->GetType());
- }
+ // Loop phis must have a type to guarantee convergence of the algorithm.
+ DCHECK_NE(phi->GetType(), Primitive::kPrimVoid);
AddToWorklist(phi);
} else {
// Because we are doing a reverse post order visit, all inputs of
@@ -220,6 +225,27 @@ void DeadPhiHandling::Run() {
ProcessWorklist();
}
+void SsaBuilder::SetLoopHeaderPhiInputs() {
+ for (size_t i = loop_headers_.size(); i > 0; --i) {
+ HBasicBlock* block = loop_headers_[i - 1];
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HPhi* phi = it.Current()->AsPhi();
+ size_t vreg = phi->GetRegNumber();
+ for (HBasicBlock* predecessor : block->GetPredecessors()) {
+ HInstruction* value = ValueOfLocal(predecessor, vreg);
+ if (value == nullptr) {
+ // Vreg is undefined at this predecessor. Mark it dead and leave with
+ // fewer inputs than predecessors. SsaChecker will fail if not removed.
+ phi->SetDead();
+ break;
+ } else {
+ phi->AddInput(value);
+ }
+ }
+ }
+ }
+}
+
void SsaBuilder::FixNullConstantType() {
// The order doesn't matter here.
for (HReversePostOrderIterator itb(*GetGraph()); !itb.Done(); itb.Advance()) {
@@ -283,15 +309,7 @@ void SsaBuilder::BuildSsa() {
}
// 2) Set inputs of loop phis.
- for (HBasicBlock* block : loop_headers_) {
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HPhi* phi = it.Current()->AsPhi();
- for (HBasicBlock* predecessor : block->GetPredecessors()) {
- HInstruction* input = ValueOfLocal(predecessor, phi->GetRegNumber());
- phi->AddInput(input);
- }
- }
- }
+ SetLoopHeaderPhiInputs();
// 3) Mark dead phis. This will mark phis that are only used by environments:
// at the DEX level, the type of these phis does not need to be consistent, but
@@ -403,8 +421,13 @@ ArenaVector<HInstruction*>* SsaBuilder::GetLocalsFor(HBasicBlock* block) {
for (size_t i = 0; i < vregs; ++i) {
// No point in creating the catch phi if it is already undefined at
// the first throwing instruction.
- if ((*current_locals_)[i] != nullptr) {
- HPhi* phi = new (arena) HPhi(arena, i, 0, Primitive::kPrimVoid);
+ HInstruction* current_local_value = (*current_locals_)[i];
+ if (current_local_value != nullptr) {
+ HPhi* phi = new (arena) HPhi(
+ arena,
+ i,
+ 0,
+ current_local_value->GetType());
block->AddPhi(phi);
(*locals)[i] = phi;
}
@@ -451,7 +474,10 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
HInstruction* incoming = ValueOfLocal(block->GetLoopInformation()->GetPreHeader(), local);
if (incoming != nullptr) {
HPhi* phi = new (GetGraph()->GetArena()) HPhi(
- GetGraph()->GetArena(), local, 0, Primitive::kPrimVoid);
+ GetGraph()->GetArena(),
+ local,
+ 0,
+ incoming->GetType());
block->AddPhi(phi);
(*current_locals_)[local] = phi;
}
@@ -484,8 +510,12 @@ void SsaBuilder::VisitBasicBlock(HBasicBlock* block) {
}
if (is_different) {
+ HInstruction* first_input = ValueOfLocal(block->GetPredecessors()[0], local);
HPhi* phi = new (GetGraph()->GetArena()) HPhi(
- GetGraph()->GetArena(), local, block->GetPredecessors().size(), Primitive::kPrimVoid);
+ GetGraph()->GetArena(),
+ local,
+ block->GetPredecessors().size(),
+ first_input->GetType());
for (size_t i = 0; i < block->GetPredecessors().size(); i++) {
HInstruction* pred_value = ValueOfLocal(block->GetPredecessors()[i], local);
phi->SetRawInputAt(i, pred_value);
@@ -583,8 +613,16 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive:
phi->GetBlock()->InsertPhiAfter(new_phi, phi);
return new_phi;
} else {
- DCHECK_EQ(next->GetType(), type);
- return next->AsPhi();
+ HPhi* next_phi = next->AsPhi();
+ DCHECK_EQ(next_phi->GetType(), type);
+ if (next_phi->IsDead()) {
+ // TODO(dbrazdil): Remove this SetLive (we should not need to revive phis)
+ // once we stop running MarkDeadPhis before PrimitiveTypePropagation. This
+ // cannot revive undefined loop header phis because they cannot have uses.
+ DCHECK(!IsUndefinedLoopHeaderPhi(next_phi));
+ next_phi->SetLive();
+ }
+ return next_phi;
}
}
@@ -638,7 +676,36 @@ void SsaBuilder::VisitLoadLocal(HLoadLocal* load) {
}
void SsaBuilder::VisitStoreLocal(HStoreLocal* store) {
- (*current_locals_)[store->GetLocal()->GetRegNumber()] = store->InputAt(1);
+ uint32_t reg_number = store->GetLocal()->GetRegNumber();
+ HInstruction* stored_value = store->InputAt(1);
+ Primitive::Type stored_type = stored_value->GetType();
+ DCHECK_NE(stored_type, Primitive::kPrimVoid);
+
+ // Storing into vreg `reg_number` may implicitly invalidate the surrounding
+ // registers. Consider the following cases:
+ // (1) Storing a wide value must overwrite previous values in both `reg_number`
+ // and `reg_number+1`. We store `nullptr` in `reg_number+1`.
+ // (2) If vreg `reg_number-1` holds a wide value, writing into `reg_number`
+ // must invalidate it. We store `nullptr` in `reg_number-1`.
+ // Consequently, storing a wide value into the high vreg of another wide value
+ // will invalidate both `reg_number-1` and `reg_number+1`.
+
+ if (reg_number != 0) {
+ HInstruction* local_low = (*current_locals_)[reg_number - 1];
+ if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) {
+ // The vreg we are storing into was previously the high vreg of a pair.
+ // We need to invalidate its low vreg.
+ DCHECK((*current_locals_)[reg_number] == nullptr);
+ (*current_locals_)[reg_number - 1] = nullptr;
+ }
+ }
+
+ (*current_locals_)[reg_number] = stored_value;
+ if (Primitive::Is64BitType(stored_type)) {
+ // We are storing a pair. Invalidate the instruction in the high vreg.
+ (*current_locals_)[reg_number + 1] = nullptr;
+ }
+
store->GetBlock()->RemoveInstruction(store);
}
@@ -660,8 +727,7 @@ void SsaBuilder::VisitInstruction(HInstruction* instruction) {
if (instruction->CanThrowIntoCatchBlock()) {
const HTryBoundary& try_entry =
instruction->GetBlock()->GetTryCatchInformation()->GetTryEntry();
- for (HExceptionHandlerIterator it(try_entry); !it.Done(); it.Advance()) {
- HBasicBlock* catch_block = it.Current();
+ for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) {
ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block);
DCHECK_EQ(handler_locals->size(), current_locals_->size());
for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 79f1a28ac8..dcce5e4c2c 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -81,6 +81,7 @@ class SsaBuilder : public HGraphVisitor {
static constexpr const char* kSsaBuilderPassName = "ssa_builder";
private:
+ void SetLoopHeaderPhiInputs();
void FixNullConstantType();
void EquivalentPhisCleanup();
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index 72f9ddd506..a3219dcc38 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -16,6 +16,8 @@
#include "ssa_phi_elimination.h"
+#include "base/arena_containers.h"
+
namespace art {
void SsaDeadPhiElimination::Run() {
@@ -24,22 +26,36 @@ void SsaDeadPhiElimination::Run() {
}
void SsaDeadPhiElimination::MarkDeadPhis() {
+ // Phis are constructed live and should not be revived if previously marked
+ // dead. This algorithm temporarily breaks that invariant but we DCHECK that
+ // only phis which were initially live are revived.
+ ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter());
+
// Add to the worklist phis referenced by non-phi instructions.
for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
HBasicBlock* block = it.Current();
for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
HPhi* phi = inst_it.Current()->AsPhi();
- // Set dead ahead of running through uses. The phi may have no use.
- phi->SetDead();
+ if (phi->IsDead()) {
+ continue;
+ }
+
+ bool has_non_phi_use = false;
for (HUseIterator<HInstruction*> use_it(phi->GetUses()); !use_it.Done(); use_it.Advance()) {
- HUseListNode<HInstruction*>* current = use_it.Current();
- HInstruction* user = current->GetUser();
- if (!user->IsPhi()) {
- worklist_.push_back(phi);
- phi->SetLive();
+ if (!use_it.Current()->GetUser()->IsPhi()) {
+ has_non_phi_use = true;
break;
}
}
+
+ if (has_non_phi_use) {
+ worklist_.push_back(phi);
+ } else {
+ phi->SetDead();
+ if (kIsDebugBuild) {
+ initially_live.insert(phi);
+ }
+ }
}
}
@@ -48,10 +64,13 @@ void SsaDeadPhiElimination::MarkDeadPhis() {
HPhi* phi = worklist_.back();
worklist_.pop_back();
for (HInputIterator it(phi); !it.Done(); it.Advance()) {
- HInstruction* input = it.Current();
- if (input->IsPhi() && input->AsPhi()->IsDead()) {
- worklist_.push_back(input->AsPhi());
- input->AsPhi()->SetLive();
+ HPhi* input = it.Current()->AsPhi();
+ if (input != nullptr && input->IsDead()) {
+ // Input is a dead phi. Revive it and add to the worklist. We make sure
+ // that the phi was not dead initially (see definition of `initially_live`).
+ DCHECK(ContainsElement(initially_live, input));
+ input->SetLive();
+ worklist_.push_back(input);
}
}
}
@@ -118,7 +137,6 @@ void SsaRedundantPhiElimination::Run() {
}
if (phi->InputCount() == 0) {
- DCHECK(phi->IsCatchPhi());
DCHECK(phi->IsDead());
continue;
}