summaryrefslogtreecommitdiff
path: root/compiler/optimizing
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/optimizing')
-rw-r--r--compiler/optimizing/block_builder.cc30
-rw-r--r--compiler/optimizing/bounds_check_elimination.cc46
-rw-r--r--compiler/optimizing/bounds_check_elimination.h2
-rw-r--r--compiler/optimizing/bounds_check_elimination_test.cc15
-rw-r--r--compiler/optimizing/builder.cc12
-rw-r--r--compiler/optimizing/builder.h3
-rw-r--r--compiler/optimizing/cha_guard_optimization.cc9
-rw-r--r--compiler/optimizing/cha_guard_optimization.h2
-rw-r--r--compiler/optimizing/code_generator.cc422
-rw-r--r--compiler/optimizing/code_generator.h51
-rw-r--r--compiler/optimizing/code_generator_arm64.cc2365
-rw-r--r--compiler/optimizing/code_generator_arm64.h348
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc2419
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h336
-rw-r--r--compiler/optimizing/code_generator_mips.cc1118
-rw-r--r--compiler/optimizing/code_generator_mips.h136
-rw-r--r--compiler/optimizing/code_generator_mips64.cc804
-rw-r--r--compiler/optimizing/code_generator_mips64.h133
-rw-r--r--compiler/optimizing/code_generator_vector_arm64.cc240
-rw-r--r--compiler/optimizing/code_generator_vector_arm_vixl.cc136
-rw-r--r--compiler/optimizing/code_generator_vector_mips.cc112
-rw-r--r--compiler/optimizing/code_generator_vector_mips64.cc110
-rw-r--r--compiler/optimizing/code_generator_vector_x86.cc181
-rw-r--r--compiler/optimizing/code_generator_vector_x86_64.cc181
-rw-r--r--compiler/optimizing/code_generator_x86.cc887
-rw-r--r--compiler/optimizing/code_generator_x86.h144
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc901
-rw-r--r--compiler/optimizing/code_generator_x86_64.h138
-rw-r--r--compiler/optimizing/code_sinking.cc17
-rw-r--r--compiler/optimizing/code_sinking.h2
-rw-r--r--compiler/optimizing/codegen_test.cc70
-rw-r--r--compiler/optimizing/codegen_test_utils.h76
-rw-r--r--compiler/optimizing/common_arm.h2
-rw-r--r--compiler/optimizing/common_arm64.h40
-rw-r--r--compiler/optimizing/constant_folding.cc49
-rw-r--r--compiler/optimizing/constant_folding.h2
-rw-r--r--compiler/optimizing/constant_folding_test.cc7
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.cc47
-rw-r--r--compiler/optimizing/constructor_fence_redundancy_elimination.h2
-rw-r--r--compiler/optimizing/data_type.h36
-rw-r--r--compiler/optimizing/dead_code_elimination.cc3
-rw-r--r--compiler/optimizing/dead_code_elimination.h3
-rw-r--r--compiler/optimizing/dead_code_elimination_test.cc7
-rw-r--r--compiler/optimizing/emit_swap_mips_test.cc20
-rw-r--r--compiler/optimizing/graph_checker.cc118
-rw-r--r--compiler/optimizing/graph_checker.h70
-rw-r--r--compiler/optimizing/graph_visualizer.cc159
-rw-r--r--compiler/optimizing/gvn.cc23
-rw-r--r--compiler/optimizing/gvn.h2
-rw-r--r--compiler/optimizing/induction_var_analysis.cc7
-rw-r--r--compiler/optimizing/induction_var_analysis.h2
-rw-r--r--compiler/optimizing/induction_var_range.cc104
-rw-r--r--compiler/optimizing/induction_var_range.h18
-rw-r--r--compiler/optimizing/induction_var_range_test.cc14
-rw-r--r--compiler/optimizing/inliner.cc240
-rw-r--r--compiler/optimizing/inliner.h10
-rw-r--r--compiler/optimizing/instruction_builder.cc821
-rw-r--r--compiler/optimizing/instruction_builder.h85
-rw-r--r--compiler/optimizing/instruction_simplifier.cc445
-rw-r--r--compiler/optimizing/instruction_simplifier.h8
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.cc32
-rw-r--r--compiler/optimizing/instruction_simplifier_arm.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.cc33
-rw-r--r--compiler/optimizing/instruction_simplifier_arm64.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_mips.cc7
-rw-r--r--compiler/optimizing/instruction_simplifier_mips.h2
-rw-r--r--compiler/optimizing/instruction_simplifier_shared.cc4
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.cc88
-rw-r--r--compiler/optimizing/instruction_simplifier_x86.h44
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.cc82
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_64.h48
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.cc137
-rw-r--r--compiler/optimizing/instruction_simplifier_x86_shared.h29
-rw-r--r--compiler/optimizing/intrinsic_objects.cc121
-rw-r--r--compiler/optimizing/intrinsic_objects.h83
-rw-r--r--compiler/optimizing/intrinsics.cc532
-rw-r--r--compiler/optimizing/intrinsics.h102
-rw-r--r--compiler/optimizing/intrinsics_arm64.cc788
-rw-r--r--compiler/optimizing/intrinsics_arm64.h8
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.cc775
-rw-r--r--compiler/optimizing/intrinsics_arm_vixl.h8
-rw-r--r--compiler/optimizing/intrinsics_mips.cc878
-rw-r--r--compiler/optimizing/intrinsics_mips.h9
-rw-r--r--compiler/optimizing/intrinsics_mips64.cc527
-rw-r--r--compiler/optimizing/intrinsics_mips64.h10
-rw-r--r--compiler/optimizing/intrinsics_utils.h4
-rw-r--r--compiler/optimizing/intrinsics_x86.cc762
-rw-r--r--compiler/optimizing/intrinsics_x86.h8
-rw-r--r--compiler/optimizing/intrinsics_x86_64.cc574
-rw-r--r--compiler/optimizing/intrinsics_x86_64.h8
-rw-r--r--compiler/optimizing/licm.cc5
-rw-r--r--compiler/optimizing/licm.h2
-rw-r--r--compiler/optimizing/linearize_test.cc8
-rw-r--r--compiler/optimizing/live_ranges_test.cc40
-rw-r--r--compiler/optimizing/liveness_test.cc10
-rw-r--r--compiler/optimizing/load_store_analysis.cc9
-rw-r--r--compiler/optimizing/load_store_analysis.h81
-rw-r--r--compiler/optimizing/load_store_analysis_test.cc119
-rw-r--r--compiler/optimizing/load_store_elimination.cc187
-rw-r--r--compiler/optimizing/load_store_elimination.h2
-rw-r--r--compiler/optimizing/loop_analysis.cc193
-rw-r--r--compiler/optimizing/loop_analysis.h183
-rw-r--r--compiler/optimizing/loop_optimization.cc639
-rw-r--r--compiler/optimizing/loop_optimization.h56
-rw-r--r--compiler/optimizing/loop_optimization_test.cc8
-rw-r--r--compiler/optimizing/nodes.cc200
-rw-r--r--compiler/optimizing/nodes.h1817
-rw-r--r--compiler/optimizing/nodes_mips.h16
-rw-r--r--compiler/optimizing/nodes_shared.h36
-rw-r--r--compiler/optimizing/nodes_vector.h280
-rw-r--r--compiler/optimizing/nodes_vector_test.cc6
-rw-r--r--compiler/optimizing/nodes_x86.h100
-rw-r--r--compiler/optimizing/optimization.cc94
-rw-r--r--compiler/optimizing/optimization.h34
-rw-r--r--compiler/optimizing/optimizing_cfi_test.cc31
-rw-r--r--compiler/optimizing/optimizing_compiler.cc770
-rw-r--r--compiler/optimizing/optimizing_compiler.h15
-rw-r--r--compiler/optimizing/optimizing_compiler_stats.h10
-rw-r--r--compiler/optimizing/optimizing_unit_test.h87
-rw-r--r--compiler/optimizing/parallel_move_resolver.h4
-rw-r--r--compiler/optimizing/parallel_move_test.cc37
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.cc23
-rw-r--r--compiler/optimizing/pc_relative_fixups_mips.h2
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.cc81
-rw-r--r--compiler/optimizing/pc_relative_fixups_x86.h2
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.cc49
-rw-r--r--compiler/optimizing/prepare_for_register_allocation.h34
-rw-r--r--compiler/optimizing/pretty_printer.h14
-rw-r--r--compiler/optimizing/reference_type_propagation.cc166
-rw-r--r--compiler/optimizing/reference_type_propagation.h6
-rw-r--r--compiler/optimizing/register_allocation_resolver.cc8
-rw-r--r--compiler/optimizing/register_allocator_graph_color.cc16
-rw-r--r--compiler/optimizing/register_allocator_graph_color.h6
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.cc6
-rw-r--r--compiler/optimizing/register_allocator_linear_scan.h6
-rw-r--r--compiler/optimizing/register_allocator_test.cc90
-rw-r--r--compiler/optimizing/scheduler.cc110
-rw-r--r--compiler/optimizing/scheduler.h70
-rw-r--r--compiler/optimizing/scheduler_arm.cc8
-rw-r--r--compiler/optimizing/scheduler_arm.h13
-rw-r--r--compiler/optimizing/scheduler_arm64.h21
-rw-r--r--compiler/optimizing/scheduler_test.cc59
-rw-r--r--compiler/optimizing/select_generator.cc64
-rw-r--r--compiler/optimizing/select_generator.h2
-rw-r--r--compiler/optimizing/select_generator_test.cc96
-rw-r--r--compiler/optimizing/sharpening.cc204
-rw-r--r--compiler/optimizing/sharpening.h48
-rw-r--r--compiler/optimizing/side_effects_analysis.cc3
-rw-r--r--compiler/optimizing/side_effects_analysis.h2
-rw-r--r--compiler/optimizing/side_effects_test.cc27
-rw-r--r--compiler/optimizing/ssa_builder.cc75
-rw-r--r--compiler/optimizing/ssa_builder.h10
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.cc68
-rw-r--r--compiler/optimizing/ssa_liveness_analysis.h58
-rw-r--r--compiler/optimizing/ssa_liveness_analysis_test.cc64
-rw-r--r--compiler/optimizing/ssa_phi_elimination.cc8
-rw-r--r--compiler/optimizing/ssa_phi_elimination.h4
-rw-r--r--compiler/optimizing/ssa_test.cc8
-rw-r--r--compiler/optimizing/stack_map_stream.cc868
-rw-r--r--compiler/optimizing/stack_map_stream.h277
-rw-r--r--compiler/optimizing/stack_map_test.cc1108
-rw-r--r--compiler/optimizing/superblock_cloner.cc511
-rw-r--r--compiler/optimizing/superblock_cloner.h156
-rw-r--r--compiler/optimizing/superblock_cloner_test.cc578
-rw-r--r--compiler/optimizing/x86_memory_gen.cc5
-rw-r--r--compiler/optimizing/x86_memory_gen.h2
166 files changed, 17037 insertions, 12575 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc
index d9df23fd47..a5f78cafe0 100644
--- a/compiler/optimizing/block_builder.cc
+++ b/compiler/optimizing/block_builder.cc
@@ -68,7 +68,7 @@ bool HBasicBlockBuilder::CreateBranchTargets() {
// places where the program might fall through into/out of the a block and
// where TryBoundary instructions will be inserted later. Other edges which
// enter/exit the try blocks are a result of branches/switches.
- for (const DexFile::TryItem& try_item : code_item_accessor_.TryItems()) {
+ for (const dex::TryItem& try_item : code_item_accessor_.TryItems()) {
uint32_t dex_pc_start = try_item.start_addr_;
uint32_t dex_pc_end = dex_pc_start + try_item.insn_count_;
MaybeCreateBlockAt(dex_pc_start);
@@ -222,9 +222,9 @@ void HBasicBlockBuilder::ConnectBasicBlocks() {
}
// Returns the TryItem stored for `block` or nullptr if there is no info for it.
-static const DexFile::TryItem* GetTryItem(
+static const dex::TryItem* GetTryItem(
HBasicBlock* block,
- const ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) {
+ const ScopedArenaSafeMap<uint32_t, const dex::TryItem*>& try_block_info) {
auto iterator = try_block_info.find(block->GetBlockId());
return (iterator == try_block_info.end()) ? nullptr : iterator->second;
}
@@ -235,7 +235,7 @@ static const DexFile::TryItem* GetTryItem(
// for a handler.
static void LinkToCatchBlocks(HTryBoundary* try_boundary,
const CodeItemDataAccessor& accessor,
- const DexFile::TryItem* try_item,
+ const dex::TryItem* try_item,
const ScopedArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) {
for (CatchHandlerIterator it(accessor.GetCatchHandlerData(try_item->handler_off_));
it.HasNext();
@@ -279,7 +279,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// Keep a map of all try blocks and their respective TryItems. We do not use
// the block's pointer but rather its id to ensure deterministic iteration.
- ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info(
+ ScopedArenaSafeMap<uint32_t, const dex::TryItem*> try_block_info(
std::less<uint32_t>(), local_allocator_->Adapter(kArenaAllocGraphBuilder));
// Obtain TryItem information for blocks with throwing instructions, and split
@@ -295,7 +295,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// loop for synchronized blocks.
if (ContainsElement(throwing_blocks_, block)) {
// Try to find a TryItem covering the block.
- const DexFile::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc());
+ const dex::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc());
if (try_item != nullptr) {
// Block throwing and in a TryItem. Store the try block information.
try_block_info.Put(block->GetBlockId(), try_item);
@@ -315,8 +315,16 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
CatchHandlerIterator iterator(handlers_ptr);
for (; iterator.HasNext(); iterator.Next()) {
uint32_t address = iterator.GetHandlerAddress();
- if (catch_blocks.find(address) != catch_blocks.end()) {
+ auto existing = catch_blocks.find(address);
+ if (existing != catch_blocks.end()) {
// Catch block already processed.
+ TryCatchInformation* info = existing->second->GetTryCatchInformation();
+ if (iterator.GetHandlerTypeIndex() != info->GetCatchTypeIndex()) {
+ // The handler is for multiple types. We could record all the types, but
+ // doing class resolution here isn't ideal, and it's unclear whether wasting
+ // the space in TryCatchInformation is worth it.
+ info->SetInvalidTypeIndex();
+ }
continue;
}
@@ -337,7 +345,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
catch_blocks.Put(address, catch_block);
catch_block->SetTryCatchInformation(
- new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
+ new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_));
}
handlers_ptr = iterator.EndDataPointer();
}
@@ -348,7 +356,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// that all predecessors are relinked to. This preserves loop headers (b/23895756).
for (const auto& entry : try_block_info) {
uint32_t block_id = entry.first;
- const DexFile::TryItem* try_item = entry.second;
+ const dex::TryItem* try_item = entry.second;
HBasicBlock* try_block = graph_->GetBlocks()[block_id];
for (HBasicBlock* predecessor : try_block->GetPredecessors()) {
if (GetTryItem(predecessor, try_block_info) != try_item) {
@@ -367,7 +375,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() {
// the successor is not in the same TryItem.
for (const auto& entry : try_block_info) {
uint32_t block_id = entry.first;
- const DexFile::TryItem* try_item = entry.second;
+ const dex::TryItem* try_item = entry.second;
HBasicBlock* try_block = graph_->GetBlocks()[block_id];
// NOTE: Do not use iterators because SplitEdge would invalidate them.
for (size_t i = 0, e = try_block->GetSuccessors().size(); i < e; ++i) {
@@ -415,7 +423,7 @@ void HBasicBlockBuilder::BuildIntrinsic() {
// Create blocks.
HBasicBlock* entry_block = new (allocator_) HBasicBlock(graph_, kNoDexPc);
HBasicBlock* exit_block = new (allocator_) HBasicBlock(graph_, kNoDexPc);
- HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc */ kNoDexPc, /* store_dex_pc */ 0u);
+ HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc= */ kNoDexPc, /* store_dex_pc= */ 0u);
// Add blocks to the graph.
graph_->AddBlock(entry_block);
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index d893cc88c4..e35d50220e 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -388,10 +388,10 @@ class MonotonicValueRange : public ValueRange {
return induction_variable_->GetBlock();
}
- MonotonicValueRange* AsMonotonicValueRange() OVERRIDE { return this; }
+ MonotonicValueRange* AsMonotonicValueRange() override { return this; }
// If it's certain that this value range fits in other_range.
- bool FitsIn(ValueRange* other_range) const OVERRIDE {
+ bool FitsIn(ValueRange* other_range) const override {
if (other_range == nullptr) {
return true;
}
@@ -402,7 +402,7 @@ class MonotonicValueRange : public ValueRange {
// Try to narrow this MonotonicValueRange given another range.
// Ideally it will return a normal ValueRange. But due to
// possible overflow/underflow, that may not be possible.
- ValueRange* Narrow(ValueRange* range) OVERRIDE {
+ ValueRange* Narrow(ValueRange* range) override {
if (range == nullptr) {
return this;
}
@@ -530,7 +530,7 @@ class BCEVisitor : public HGraphVisitor {
induction_range_(induction_analysis),
next_(nullptr) {}
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
DCHECK(!IsAddedBlock(block));
first_index_bounds_check_map_.clear();
// Visit phis and instructions using a safe iterator. The iteration protects
@@ -820,7 +820,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
+ void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
HBasicBlock* block = bounds_check->GetBlock();
HInstruction* index = bounds_check->InputAt(0);
HInstruction* array_length = bounds_check->InputAt(1);
@@ -845,8 +845,10 @@ class BCEVisitor : public HGraphVisitor {
// make one more attempt to get a constant in the array range.
ValueRange* existing_range = LookupValueRange(array_length, block);
if (existing_range != nullptr &&
- existing_range->IsConstantValueRange()) {
- ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower());
+ existing_range->IsConstantValueRange() &&
+ existing_range->GetLower().GetConstant() > 0) {
+ ValueBound constant_upper(nullptr, existing_range->GetLower().GetConstant() - 1);
+ ValueRange constant_array_range(&allocator_, lower, constant_upper);
if (index_range->FitsIn(&constant_array_range)) {
ReplaceInstruction(bounds_check, index);
return;
@@ -945,7 +947,7 @@ class BCEVisitor : public HGraphVisitor {
return true;
}
- void VisitPhi(HPhi* phi) OVERRIDE {
+ void VisitPhi(HPhi* phi) override {
if (phi->IsLoopHeaderPhi()
&& (phi->GetType() == DataType::Type::kInt32)
&& HasSameInputAtBackEdges(phi)) {
@@ -992,14 +994,14 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitIf(HIf* instruction) OVERRIDE {
+ void VisitIf(HIf* instruction) override {
if (instruction->InputAt(0)->IsCondition()) {
HCondition* cond = instruction->InputAt(0)->AsCondition();
HandleIf(instruction, cond->GetLeft(), cond->GetRight(), cond->GetCondition());
}
}
- void VisitAdd(HAdd* add) OVERRIDE {
+ void VisitAdd(HAdd* add) override {
HInstruction* right = add->GetRight();
if (right->IsIntConstant()) {
ValueRange* left_range = LookupValueRange(add->GetLeft(), add->GetBlock());
@@ -1013,7 +1015,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitSub(HSub* sub) OVERRIDE {
+ void VisitSub(HSub* sub) override {
HInstruction* left = sub->GetLeft();
HInstruction* right = sub->GetRight();
if (right->IsIntConstant()) {
@@ -1115,19 +1117,19 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitDiv(HDiv* div) OVERRIDE {
+ void VisitDiv(HDiv* div) override {
FindAndHandlePartialArrayLength(div);
}
- void VisitShr(HShr* shr) OVERRIDE {
+ void VisitShr(HShr* shr) override {
FindAndHandlePartialArrayLength(shr);
}
- void VisitUShr(HUShr* ushr) OVERRIDE {
+ void VisitUShr(HUShr* ushr) override {
FindAndHandlePartialArrayLength(ushr);
}
- void VisitAnd(HAnd* instruction) OVERRIDE {
+ void VisitAnd(HAnd* instruction) override {
if (instruction->GetRight()->IsIntConstant()) {
int32_t constant = instruction->GetRight()->AsIntConstant()->GetValue();
if (constant > 0) {
@@ -1142,7 +1144,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitRem(HRem* instruction) OVERRIDE {
+ void VisitRem(HRem* instruction) override {
HInstruction* left = instruction->GetLeft();
HInstruction* right = instruction->GetRight();
@@ -1202,7 +1204,7 @@ class BCEVisitor : public HGraphVisitor {
}
}
- void VisitNewArray(HNewArray* new_array) OVERRIDE {
+ void VisitNewArray(HNewArray* new_array) override {
HInstruction* len = new_array->GetLength();
if (!len->IsIntConstant()) {
HInstruction *left;
@@ -1240,7 +1242,7 @@ class BCEVisitor : public HGraphVisitor {
* has occurred (see AddCompareWithDeoptimization()), since in those cases it would be
* unsafe to hoist array references across their deoptimization instruction inside a loop.
*/
- void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+ void VisitArrayGet(HArrayGet* array_get) override {
if (!has_dom_based_dynamic_bce_ && array_get->IsInLoop()) {
HLoopInformation* loop = array_get->GetBlock()->GetLoopInformation();
if (loop->IsDefinedOutOfTheLoop(array_get->InputAt(0)) &&
@@ -1634,7 +1636,7 @@ class BCEVisitor : public HGraphVisitor {
HBasicBlock* block = GetPreHeader(loop, check);
HInstruction* cond =
new (GetGraph()->GetAllocator()) HEqual(array, GetGraph()->GetNullConstant());
- InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true);
+ InsertDeoptInLoop(loop, block, cond, /* is_null_check= */ true);
ReplaceInstruction(check, array);
return true;
}
@@ -1938,9 +1940,9 @@ class BCEVisitor : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
};
-void BoundsCheckElimination::Run() {
+bool BoundsCheckElimination::Run() {
if (!graph_->HasBoundsChecks()) {
- return;
+ return false;
}
// Reverse post order guarantees a node's dominators are visited first.
@@ -1968,6 +1970,8 @@ void BoundsCheckElimination::Run() {
// Perform cleanup.
visitor.Finish();
+
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h
index 79c67a8c7a..ef08877daa 100644
--- a/compiler/optimizing/bounds_check_elimination.h
+++ b/compiler/optimizing/bounds_check_elimination.h
@@ -34,7 +34,7 @@ class BoundsCheckElimination : public HOptimization {
side_effects_(side_effects),
induction_analysis_(induction_analysis) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kBoundsCheckEliminationPassName = "BCE";
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 1523478613..5927d681b2 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@ class BoundsCheckEliminationTest : public OptimizingUnitTest {
void RunBCE() {
graph_->BuildDominatorTree();
- InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run();
+ InstructionSimplifier(graph_, /* codegen= */ nullptr).Run();
SideEffectsAnalysis side_effects(graph_);
side_effects.Run();
@@ -598,9 +598,10 @@ static HInstruction* BuildSSAGraph3(HGraph* graph,
entry->AddSuccessor(block);
// We pass a bogus constant for the class to avoid mocking one.
HInstruction* new_array = new (allocator) HNewArray(
- constant_10,
- constant_10,
- 0);
+ /* cls= */ constant_10,
+ /* length= */ constant_10,
+ /* dex_pc= */ 0,
+ /* component_size_shift= */ 0);
block->AddInstruction(new_array);
block->AddInstruction(new (allocator) HGoto());
@@ -977,7 +978,11 @@ TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) {
graph_->AddBlock(block);
entry->AddSuccessor(block);
// We pass a bogus constant for the class to avoid mocking one.
- HInstruction* new_array = new (GetAllocator()) HNewArray(constant_10, constant_10, 0);
+ HInstruction* new_array = new (GetAllocator()) HNewArray(
+ /* cls= */ constant_10,
+ /* length= */ constant_10,
+ /* dex_pc= */ 0,
+ /* component_size_shift= */ 0);
block->AddInstruction(new_array);
block->AddInstruction(new (GetAllocator()) HGoto());
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index a1a5692ef6..64aa1b9358 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -21,6 +21,7 @@
#include "base/bit_vector-inl.h"
#include "base/logging.h"
#include "block_builder.h"
+#include "code_generator.h"
#include "data_type-inl.h"
#include "dex/verified_method.h"
#include "driver/compiler_options.h"
@@ -40,7 +41,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph,
const CodeItemDebugInfoAccessor& accessor,
const DexCompilationUnit* dex_compilation_unit,
const DexCompilationUnit* outer_compilation_unit,
- CompilerDriver* driver,
CodeGenerator* code_generator,
OptimizingCompilerStats* compiler_stats,
ArrayRef<const uint8_t> interpreter_metadata,
@@ -50,7 +50,6 @@ HGraphBuilder::HGraphBuilder(HGraph* graph,
code_item_accessor_(accessor),
dex_compilation_unit_(dex_compilation_unit),
outer_compilation_unit_(outer_compilation_unit),
- compiler_driver_(driver),
code_generator_(code_generator),
compilation_stats_(compiler_stats),
interpreter_metadata_(interpreter_metadata),
@@ -67,19 +66,18 @@ HGraphBuilder::HGraphBuilder(HGraph* graph,
code_item_accessor_(accessor),
dex_compilation_unit_(dex_compilation_unit),
outer_compilation_unit_(nullptr),
- compiler_driver_(nullptr),
code_generator_(nullptr),
compilation_stats_(nullptr),
handles_(handles),
return_type_(return_type) {}
bool HGraphBuilder::SkipCompilation(size_t number_of_branches) {
- if (compiler_driver_ == nullptr) {
- // Note that the compiler driver is null when unit testing.
+ if (code_generator_ == nullptr) {
+ // Note that the codegen is null when unit testing.
return false;
}
- const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
+ const CompilerOptions& compiler_options = code_generator_->GetCompilerOptions();
CompilerFilter::Filter compiler_filter = compiler_options.GetCompilerFilter();
if (compiler_filter == CompilerFilter::kEverything) {
return false;
@@ -131,7 +129,6 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() {
return_type_,
dex_compilation_unit_,
outer_compilation_unit_,
- compiler_driver_,
code_generator_,
interpreter_metadata_,
compilation_stats_,
@@ -203,7 +200,6 @@ void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) {
return_type_,
dex_compilation_unit_,
outer_compilation_unit_,
- compiler_driver_,
code_generator_,
interpreter_metadata_,
compilation_stats_,
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 5a1914ce08..6152740324 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -22,7 +22,6 @@
#include "dex/code_item_accessors.h"
#include "dex/dex_file-inl.h"
#include "dex/dex_file.h"
-#include "driver/compiler_driver.h"
#include "nodes.h"
namespace art {
@@ -38,7 +37,6 @@ class HGraphBuilder : public ValueObject {
const CodeItemDebugInfoAccessor& accessor,
const DexCompilationUnit* dex_compilation_unit,
const DexCompilationUnit* outer_compilation_unit,
- CompilerDriver* driver,
CodeGenerator* code_generator,
OptimizingCompilerStats* compiler_stats,
ArrayRef<const uint8_t> interpreter_metadata,
@@ -70,7 +68,6 @@ class HGraphBuilder : public ValueObject {
// The compilation unit of the enclosing method being compiled.
const DexCompilationUnit* const outer_compilation_unit_;
- CompilerDriver* const compiler_driver_;
CodeGenerator* const code_generator_;
OptimizingCompilerStats* const compilation_stats_;
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index 3addaeecd9..c6232ef661 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -44,9 +44,9 @@ class CHAGuardVisitor : HGraphVisitor {
GetGraph()->SetNumberOfCHAGuards(0);
}
- void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) OVERRIDE;
+ void VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) override;
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+ void VisitBasicBlock(HBasicBlock* block) override;
private:
void RemoveGuard(HShouldDeoptimizeFlag* flag);
@@ -241,14 +241,15 @@ void CHAGuardVisitor::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
GetGraph()->IncrementNumberOfCHAGuards();
}
-void CHAGuardOptimization::Run() {
+bool CHAGuardOptimization::Run() {
if (graph_->GetNumberOfCHAGuards() == 0) {
- return;
+ return false;
}
CHAGuardVisitor visitor(graph_);
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
visitor.VisitBasicBlock(block);
}
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h
index f14e07bd6c..440d51a969 100644
--- a/compiler/optimizing/cha_guard_optimization.h
+++ b/compiler/optimizing/cha_guard_optimization.h
@@ -30,7 +30,7 @@ class CHAGuardOptimization : public HOptimization {
const char* name = kCHAGuardOptimizationPassName)
: HOptimization(graph, name) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kCHAGuardOptimizationPassName = "cha_guard_optimization";
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 6abda9b302..2bbb570c8d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -49,8 +49,9 @@
#include "dex/bytecode_utils.h"
#include "dex/code_item_accessors-inl.h"
#include "dex/verified_method.h"
-#include "driver/compiler_driver.h"
#include "graph_visualizer.h"
+#include "image.h"
+#include "gc/space/image_space.h"
#include "intern_table.h"
#include "intrinsics.h"
#include "mirror/array-inl.h"
@@ -61,15 +62,13 @@
#include "parallel_move_resolver.h"
#include "scoped_thread_state_change-inl.h"
#include "ssa_liveness_analysis.h"
+#include "stack_map.h"
#include "stack_map_stream.h"
#include "thread-current-inl.h"
#include "utils/assembler.h"
namespace art {
-// If true, we record the static and direct invokes in the invoke infos.
-static constexpr bool kEnableDexLayoutOptimizations = false;
-
// Return whether a location is consistent with a type.
static bool CheckType(DataType::Type type, Location location) {
if (location.IsFpuRegister()
@@ -197,7 +196,7 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo
return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots();
}
- void EmitJitRoots(Handle<mirror::ObjectArray<mirror::Object>> roots)
+ void EmitJitRoots(/*out*/std::vector<Handle<mirror::Object>>* roots)
REQUIRES_SHARED(Locks::mutator_lock_);
private:
@@ -230,29 +229,31 @@ class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllo
};
void CodeGenerator::CodeGenerationData::EmitJitRoots(
- Handle<mirror::ObjectArray<mirror::Object>> roots) {
- DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots());
+ /*out*/std::vector<Handle<mirror::Object>>* roots) {
+ DCHECK(roots->empty());
+ roots->reserve(GetNumberOfJitRoots());
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
size_t index = 0;
for (auto& entry : jit_string_roots_) {
// Update the `roots` with the string, and replace the address temporarily
// stored to the index in the table.
uint64_t address = entry.second;
- roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr());
- DCHECK(roots->Get(index) != nullptr);
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsString());
entry.second = index;
// Ensure the string is strongly interned. This is a requirement on how the JIT
// handles strings. b/32995596
- class_linker->GetInternTable()->InternStrong(
- reinterpret_cast<mirror::String*>(roots->Get(index)));
+ class_linker->GetInternTable()->InternStrong(roots->back()->AsString());
++index;
}
for (auto& entry : jit_class_roots_) {
// Update the `roots` with the class, and replace the address temporarily
// stored to the index in the table.
uint64_t address = entry.second;
- roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr());
- DCHECK(roots->Get(index) != nullptr);
+ roots->emplace_back(reinterpret_cast<StackReference<mirror::Object>*>(address));
+ DCHECK(roots->back() != nullptr);
+ DCHECK(roots->back()->IsClass());
entry.second = index;
++index;
}
@@ -390,6 +391,11 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
HGraphVisitor* instruction_visitor = GetInstructionVisitor();
DCHECK_EQ(current_block_index_, 0u);
+ GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_,
+ core_spill_mask_,
+ fpu_spill_mask_,
+ GetGraph()->GetNumberOfVRegs());
+
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
DCHECK_EQ(GetAssembler()->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size_));
@@ -407,7 +413,7 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
// This ensures that we have correct native line mapping for all native instructions.
// It is necessary to make stepping over a statement work. Otherwise, any initial
// instructions (e.g. moves) would be assumed to be the start of next statement.
- MaybeRecordNativeDebugInfo(nullptr /* instruction */, block->GetDexPc());
+ MaybeRecordNativeDebugInfo(/* instruction= */ nullptr, block->GetDexPc());
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* current = it.Current();
if (current->HasEnvironment()) {
@@ -432,6 +438,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
// Finalize instructions in assember;
Finalize(allocator);
+
+ GetStackMapStream()->EndMethod();
}
void CodeGenerator::Finalize(CodeAllocator* allocator) {
@@ -447,6 +455,18 @@ void CodeGenerator::EmitLinkerPatches(
// No linker patches by default.
}
+bool CodeGenerator::NeedsThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED) const {
+ // Code generators that create patches requiring thunk compilation should override this function.
+ return false;
+}
+
+void CodeGenerator::EmitThunkCode(const linker::LinkerPatch& patch ATTRIBUTE_UNUSED,
+ /*out*/ ArenaVector<uint8_t>* code ATTRIBUTE_UNUSED,
+ /*out*/ std::string* debug_name ATTRIBUTE_UNUSED) {
+ // Code generators that create patches requiring thunk compilation should override this function.
+ LOG(FATAL) << "Unexpected call to EmitThunkCode().";
+}
+
void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots,
size_t maximum_safepoint_spill_size,
size_t number_of_out_slots,
@@ -501,7 +521,7 @@ void CodeGenerator::CreateCommonInvokeLocationSummary(
locations->AddTemp(visitor->GetMethodLocation());
break;
}
- } else {
+ } else if (!invoke->IsInvokePolymorphic()) {
locations->AddTemp(visitor->GetMethodLocation());
}
}
@@ -529,6 +549,7 @@ void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall(
case kVirtual:
case kInterface:
case kPolymorphic:
+ case kCustom:
LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType();
UNREACHABLE();
}
@@ -557,6 +578,7 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok
entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck;
break;
case kPolymorphic:
+ case kCustom:
LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType();
UNREACHABLE();
}
@@ -564,11 +586,19 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok
}
void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) {
- MoveConstant(invoke->GetLocations()->GetTemp(0), static_cast<int32_t>(invoke->GetType()));
+ // invoke-polymorphic does not use a temporary to convey any additional information (e.g. a
+ // method index) since it requires multiple info from the instruction (registers A, B, H). Not
+ // using the reservation has no effect on the registers used in the runtime call.
QuickEntrypointEnum entrypoint = kQuickInvokePolymorphic;
InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
}
+void CodeGenerator::GenerateInvokeCustomCall(HInvokeCustom* invoke) {
+ MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetCallSiteIndex());
+ QuickEntrypointEnum entrypoint = kQuickInvokeCustom;
+ InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr);
+}
+
void CodeGenerator::CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,
@@ -708,20 +738,99 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
+ DCHECK(!cls->MustGenerateClinitCheck());
LocationSummary* locations = cls->GetLocations();
MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
if (cls->NeedsAccessCheck()) {
- CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
- InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
- } else if (cls->MustGenerateClinitCheck()) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
- InvokeRuntime(kQuickInitializeStaticStorage, cls, cls->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
+ InvokeRuntime(kQuickResolveTypeAndVerifyAccess, cls, cls->GetDexPc());
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
- InvokeRuntime(kQuickInitializeType, cls, cls->GetDexPc());
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ InvokeRuntime(kQuickResolveType, cls, cls->GetDexPc());
}
}
+void CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(
+ HLoadMethodHandle* method_handle,
+ Location runtime_proto_index_location,
+ Location runtime_return_location) {
+ DCHECK_EQ(method_handle->InputCount(), 1u);
+ LocationSummary* locations =
+ new (method_handle->GetBlock()->GetGraph()->GetAllocator()) LocationSummary(
+ method_handle, LocationSummary::kCallOnMainOnly);
+ locations->SetInAt(0, Location::NoLocation());
+ locations->AddTemp(runtime_proto_index_location);
+ locations->SetOut(runtime_return_location);
+}
+
+void CodeGenerator::GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle) {
+ LocationSummary* locations = method_handle->GetLocations();
+ MoveConstant(locations->GetTemp(0), method_handle->GetMethodHandleIndex());
+ CheckEntrypointTypes<kQuickResolveMethodHandle, void*, uint32_t>();
+ InvokeRuntime(kQuickResolveMethodHandle, method_handle, method_handle->GetDexPc());
+}
+
+void CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(
+ HLoadMethodType* method_type,
+ Location runtime_proto_index_location,
+ Location runtime_return_location) {
+ DCHECK_EQ(method_type->InputCount(), 1u);
+ LocationSummary* locations =
+ new (method_type->GetBlock()->GetGraph()->GetAllocator()) LocationSummary(
+ method_type, LocationSummary::kCallOnMainOnly);
+ locations->SetInAt(0, Location::NoLocation());
+ locations->AddTemp(runtime_proto_index_location);
+ locations->SetOut(runtime_return_location);
+}
+
+void CodeGenerator::GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type) {
+ LocationSummary* locations = method_type->GetLocations();
+ MoveConstant(locations->GetTemp(0), method_type->GetProtoIndex().index_);
+ CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
+ InvokeRuntime(kQuickResolveMethodType, method_type, method_type->GetDexPc());
+}
+
+static uint32_t GetBootImageOffsetImpl(const void* object, ImageHeader::ImageSections section) {
+ Runtime* runtime = Runtime::Current();
+ DCHECK(runtime->IsAotCompiler());
+ const std::vector<gc::space::ImageSpace*>& boot_image_spaces =
+ runtime->GetHeap()->GetBootImageSpaces();
+ // Check that the `object` is in the expected section of one of the boot image files.
+ DCHECK(std::any_of(boot_image_spaces.begin(),
+ boot_image_spaces.end(),
+ [object, section](gc::space::ImageSpace* space) {
+ uintptr_t begin = reinterpret_cast<uintptr_t>(space->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return space->GetImageHeader().GetImageSection(section).Contains(offset);
+ }));
+ uintptr_t begin = reinterpret_cast<uintptr_t>(boot_image_spaces.front()->Begin());
+ uintptr_t offset = reinterpret_cast<uintptr_t>(object) - begin;
+ return dchecked_integral_cast<uint32_t>(offset);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image classes are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadClass* load_class) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_class->GetLoadKind(), HLoadClass::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::Class> klass = load_class->GetClass().Get();
+ DCHECK(klass != nullptr);
+ return GetBootImageOffsetImpl(klass.Ptr(), ImageHeader::kSectionObjects);
+}
+
+// NO_THREAD_SAFETY_ANALYSIS: Avoid taking the mutator lock, boot image strings are non-moveable.
+uint32_t CodeGenerator::GetBootImageOffset(HLoadString* load_string) NO_THREAD_SAFETY_ANALYSIS {
+ DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kBootImageRelRo);
+ ObjPtr<mirror::String> string = load_string->GetString().Get();
+ DCHECK(string != nullptr);
+ return GetBootImageOffsetImpl(string.Ptr(), ImageHeader::kSectionObjects);
+}
+
+uint32_t CodeGenerator::GetBootImageOffset(HInvokeStaticOrDirect* invoke) {
+ DCHECK_EQ(invoke->GetMethodLoadKind(), HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo);
+ ArtMethod* method = invoke->GetResolvedMethod();
+ DCHECK(method != nullptr);
+ return GetBootImageOffsetImpl(method, ImageHeader::kSectionArtMethods);
+}
+
void CodeGenerator::BlockIfInRegister(Location location, bool is_out) const {
// The DCHECKS below check that a register is not specified twice in
// the summary. The out location can overlap with an input, so we need
@@ -771,53 +880,45 @@ void CodeGenerator::AllocateLocations(HInstruction* instruction) {
}
std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph,
- InstructionSet instruction_set,
- const InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats) {
ArenaAllocator* allocator = graph->GetAllocator();
- switch (instruction_set) {
+ switch (compiler_options.GetInstructionSet()) {
#ifdef ART_ENABLE_CODEGEN_arm
case InstructionSet::kArm:
case InstructionSet::kThumb2: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) arm::CodeGeneratorARMVIXL(
- graph, *isa_features.AsArmInstructionSetFeatures(), compiler_options, stats));
+ new (allocator) arm::CodeGeneratorARMVIXL(graph, compiler_options, stats));
}
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
case InstructionSet::kArm64: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) arm64::CodeGeneratorARM64(
- graph, *isa_features.AsArm64InstructionSetFeatures(), compiler_options, stats));
+ new (allocator) arm64::CodeGeneratorARM64(graph, compiler_options, stats));
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips
case InstructionSet::kMips: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) mips::CodeGeneratorMIPS(
- graph, *isa_features.AsMipsInstructionSetFeatures(), compiler_options, stats));
+ new (allocator) mips::CodeGeneratorMIPS(graph, compiler_options, stats));
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips64
case InstructionSet::kMips64: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) mips64::CodeGeneratorMIPS64(
- graph, *isa_features.AsMips64InstructionSetFeatures(), compiler_options, stats));
+ new (allocator) mips64::CodeGeneratorMIPS64(graph, compiler_options, stats));
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) x86::CodeGeneratorX86(
- graph, *isa_features.AsX86InstructionSetFeatures(), compiler_options, stats));
+ new (allocator) x86::CodeGeneratorX86(graph, compiler_options, stats));
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64: {
return std::unique_ptr<CodeGenerator>(
- new (allocator) x86_64::CodeGeneratorX86_64(
- graph, *isa_features.AsX86_64InstructionSetFeatures(), compiler_options, stats));
+ new (allocator) x86_64::CodeGeneratorX86_64(graph, compiler_options, stats));
}
#endif
default:
@@ -861,15 +962,6 @@ CodeGenerator::CodeGenerator(HGraph* graph,
CodeGenerator::~CodeGenerator() {}
-void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size,
- size_t* method_info_size) {
- DCHECK(stack_map_size != nullptr);
- DCHECK(method_info_size != nullptr);
- StackMapStream* stack_map_stream = GetStackMapStream();
- *stack_map_size = stack_map_stream->PrepareForFillIn();
- *method_info_size = stack_map_stream->ComputeMethodInfoSize();
-}
-
size_t CodeGenerator::GetNumberOfJitRoots() const {
DCHECK(code_generation_data_ != nullptr);
return code_generation_data_->GetNumberOfJitRoots();
@@ -880,11 +972,10 @@ static void CheckCovers(uint32_t dex_pc,
const CodeInfo& code_info,
const ArenaVector<HSuspendCheck*>& loop_headers,
ArenaVector<size_t>* covered) {
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
for (size_t i = 0; i < loop_headers.size(); ++i) {
if (loop_headers[i]->GetDexPc() == dex_pc) {
if (graph.IsCompilingOsr()) {
- DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+ DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc).IsValid());
}
++(*covered)[i];
}
@@ -895,7 +986,7 @@ static void CheckCovers(uint32_t dex_pc,
// dex branch instructions.
static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
const CodeInfo& code_info,
- const DexFile::CodeItem& code_item) {
+ const dex::CodeItem& code_item) {
if (graph.HasTryCatch()) {
// One can write loops through try/catch, which we do not support for OSR anyway.
return;
@@ -937,20 +1028,18 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
}
}
-void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region,
- MemoryRegion method_info_region,
- const DexFile::CodeItem* code_item_for_osr_check) {
- StackMapStream* stack_map_stream = GetStackMapStream();
- stack_map_stream->FillInCodeInfo(stack_map_region);
- stack_map_stream->FillInMethodInfo(method_info_region);
- if (kIsDebugBuild && code_item_for_osr_check != nullptr) {
- CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), *code_item_for_osr_check);
+ScopedArenaVector<uint8_t> CodeGenerator::BuildStackMaps(const dex::CodeItem* code_item) {
+ ScopedArenaVector<uint8_t> stack_map = GetStackMapStream()->Encode();
+ if (kIsDebugBuild && code_item != nullptr) {
+ CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map.data()), *code_item);
}
+ return stack_map;
}
void CodeGenerator::RecordPcInfo(HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path) {
+ SlowPathCode* slow_path,
+ bool native_debug_info) {
if (instruction != nullptr) {
// The code generated for some type conversions
// may call the runtime, thus normally requiring a subsequent
@@ -981,7 +1070,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
if (instruction == nullptr) {
// For stack overflow checks and native-debug-info entries without dex register
// mapping (i.e. start of basic block or start of slow path).
- stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0);
+ stack_map_stream->BeginStackMapEntry(dex_pc, native_pc);
stack_map_stream->EndStackMapEntry();
return;
}
@@ -995,7 +1084,7 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
// call). Therefore register_mask contains both callee-save and caller-save
// registers that hold objects. We must remove the spilled caller-save from the
// mask, since they will be overwritten by the callee.
- uint32_t spills = GetSlowPathSpills(locations, /* core_registers */ true);
+ uint32_t spills = GetSlowPathSpills(locations, /* core_registers= */ true);
register_mask &= ~spills;
} else {
// The register mask must be a subset of callee-save registers.
@@ -1015,37 +1104,28 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
outer_dex_pc = outer_environment->GetDexPc();
outer_environment_size = outer_environment->Size();
}
+
+ HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+ bool osr =
+ instruction->IsSuspendCheck() &&
+ (info != nullptr) &&
+ graph_->IsCompilingOsr() &&
+ (inlining_depth == 0);
+ StackMap::Kind kind = native_debug_info
+ ? StackMap::Kind::Debug
+ : (osr ? StackMap::Kind::OSR : StackMap::Kind::Default);
stack_map_stream->BeginStackMapEntry(outer_dex_pc,
native_pc,
register_mask,
locations->GetStackMask(),
- outer_environment_size,
- inlining_depth);
+ kind);
EmitEnvironment(environment, slow_path);
- // Record invoke info, the common case for the trampoline is super and static invokes. Only
- // record these to reduce oat file size.
- if (kEnableDexLayoutOptimizations) {
- if (instruction->IsInvokeStaticOrDirect()) {
- HInvoke* const invoke = instruction->AsInvokeStaticOrDirect();
- DCHECK(environment != nullptr);
- stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex());
- }
- }
stack_map_stream->EndStackMapEntry();
- HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
- if (instruction->IsSuspendCheck() &&
- (info != nullptr) &&
- graph_->IsCompilingOsr() &&
- (inlining_depth == 0)) {
+ if (osr) {
DCHECK_EQ(info->GetSuspendCheck(), instruction);
- // We duplicate the stack map as a marker that this stack map can be an OSR entry.
- // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
DCHECK(info->IsIrreducible());
- stack_map_stream->BeginStackMapEntry(
- dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
- EmitEnvironment(instruction->GetEnvironment(), slow_path);
- stack_map_stream->EndStackMapEntry();
+ DCHECK(environment != nullptr);
if (kIsDebugBuild) {
for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
HInstruction* in_environment = environment->GetInstructionAt(i);
@@ -1062,14 +1142,6 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction,
}
}
}
- } else if (kIsDebugBuild) {
- // Ensure stack maps are unique, by checking that the native pc in the stack map
- // last emitted is different than the native pc of the stack map just emitted.
- size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps();
- if (number_of_stack_maps > 1) {
- DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset,
- stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset);
- }
}
}
@@ -1080,8 +1152,7 @@ bool CodeGenerator::HasStackMapAtCurrentPc() {
if (count == 0) {
return false;
}
- CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset;
- return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc);
+ return stack_map_stream->GetStackMapNativePcOffset(count - 1) == pc;
}
void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
@@ -1092,12 +1163,11 @@ void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction,
// Ensure that we do not collide with the stack map of the previous instruction.
GenerateNop();
}
- RecordPcInfo(instruction, dex_pc, slow_path);
+ RecordPcInfo(instruction, dex_pc, slow_path, /* native_debug_info= */ true);
}
}
void CodeGenerator::RecordCatchBlockInfo() {
- ArenaAllocator* allocator = graph_->GetAllocator();
StackMapStream* stack_map_stream = GetStackMapStream();
for (HBasicBlock* block : *block_order_) {
@@ -1107,30 +1177,23 @@ void CodeGenerator::RecordCatchBlockInfo() {
uint32_t dex_pc = block->GetDexPc();
uint32_t num_vregs = graph_->GetNumberOfVRegs();
- uint32_t inlining_depth = 0; // Inlining of catch blocks is not supported at the moment.
uint32_t native_pc = GetAddressOf(block);
- uint32_t register_mask = 0; // Not used.
-
- // The stack mask is not used, so we leave it empty.
- ArenaBitVector* stack_mask =
- ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator);
stack_map_stream->BeginStackMapEntry(dex_pc,
native_pc,
- register_mask,
- stack_mask,
- num_vregs,
- inlining_depth);
+ /* register_mask= */ 0,
+ /* sp_mask= */ nullptr,
+ StackMap::Kind::Catch);
HInstruction* current_phi = block->GetFirstPhi();
for (size_t vreg = 0; vreg < num_vregs; ++vreg) {
- while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
- HInstruction* next_phi = current_phi->GetNext();
- DCHECK(next_phi == nullptr ||
- current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
- << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
- current_phi = next_phi;
- }
+ while (current_phi != nullptr && current_phi->AsPhi()->GetRegNumber() < vreg) {
+ HInstruction* next_phi = current_phi->GetNext();
+ DCHECK(next_phi == nullptr ||
+ current_phi->AsPhi()->GetRegNumber() <= next_phi->AsPhi()->GetRegNumber())
+ << "Phis need to be sorted by vreg number to keep this a linear-time loop.";
+ current_phi = next_phi;
+ }
if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) {
stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
@@ -1190,50 +1253,45 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
continue;
}
+ using Kind = DexRegisterLocation::Kind;
Location location = environment->GetLocationAt(i);
switch (location.GetKind()) {
case Location::kConstant: {
DCHECK_EQ(current, location.GetConstant());
if (current->IsLongConstant()) {
int64_t value = current->AsLongConstant()->GetValue();
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kConstant, Low32Bits(value));
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kConstant, High32Bits(value));
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value));
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value));
++i;
DCHECK_LT(i, environment_size);
} else if (current->IsDoubleConstant()) {
int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue());
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kConstant, Low32Bits(value));
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kConstant, High32Bits(value));
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, Low32Bits(value));
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, High32Bits(value));
++i;
DCHECK_LT(i, environment_size);
} else if (current->IsIntConstant()) {
int32_t value = current->AsIntConstant()->GetValue();
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value);
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value);
} else if (current->IsNullConstant()) {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0);
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, 0);
} else {
DCHECK(current->IsFloatConstant()) << current->DebugName();
int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue());
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value);
+ stack_map_stream->AddDexRegisterEntry(Kind::kConstant, value);
}
break;
}
case Location::kStackSlot: {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex());
break;
}
case Location::kDoubleStackSlot: {
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, location.GetStackIndex());
stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetStackIndex());
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize));
+ Kind::kInStack, location.GetHighStackIndex(kVRegSize));
++i;
DCHECK_LT(i, environment_size);
break;
@@ -1243,17 +1301,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
int id = location.reg();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
if (current->GetType() == DataType::Type::kInt64) {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize);
++i;
DCHECK_LT(i, environment_size);
}
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, id);
if (current->GetType() == DataType::Type::kInt64) {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInRegisterHigh, id);
++i;
DCHECK_LT(i, environment_size);
}
@@ -1265,18 +1322,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
int id = location.reg();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) {
uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
if (current->GetType() == DataType::Type::kFloat64) {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInStack, offset + kVRegSize);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset + kVRegSize);
++i;
DCHECK_LT(i, environment_size);
}
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, id);
if (current->GetType() == DataType::Type::kFloat64) {
- stack_map_stream->AddDexRegisterEntry(
- DexRegisterLocation::Kind::kInFpuRegisterHigh, id);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegisterHigh, id);
++i;
DCHECK_LT(i, environment_size);
}
@@ -1289,16 +1344,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
int high = location.high();
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) {
uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, low);
}
if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) {
uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
++i;
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInFpuRegister, high);
++i;
}
DCHECK_LT(i, environment_size);
@@ -1310,15 +1365,15 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
int high = location.high();
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) {
uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, low);
}
if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) {
uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high);
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInStack, offset);
} else {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high);
+ stack_map_stream->AddDexRegisterEntry(Kind::kInRegister, high);
}
++i;
DCHECK_LT(i, environment_size);
@@ -1326,7 +1381,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
}
case Location::kInvalid: {
- stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0);
+ stack_map_stream->AddDexRegisterEntry(Kind::kNone, 0);
break;
}
@@ -1341,37 +1396,12 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo
}
bool CodeGenerator::CanMoveNullCheckToUser(HNullCheck* null_check) {
- HInstruction* first_next_not_move = null_check->GetNextDisregardingMoves();
-
- return (first_next_not_move != nullptr)
- && first_next_not_move->CanDoImplicitNullCheckOn(null_check->InputAt(0));
+ return null_check->IsEmittedAtUseSite();
}
void CodeGenerator::MaybeRecordImplicitNullCheck(HInstruction* instr) {
- if (!compiler_options_.GetImplicitNullChecks()) {
- return;
- }
-
- // If we are from a static path don't record the pc as we can't throw NPE.
- // NB: having the checks here makes the code much less verbose in the arch
- // specific code generators.
- if (instr->IsStaticFieldSet() || instr->IsStaticFieldGet()) {
- return;
- }
-
- if (!instr->CanDoImplicitNullCheckOn(instr->InputAt(0))) {
- return;
- }
-
- // Find the first previous instruction which is not a move.
- HInstruction* first_prev_not_move = instr->GetPreviousDisregardingMoves();
-
- // If the instruction is a null check it means that `instr` is the first user
- // and needs to record the pc.
- if (first_prev_not_move != nullptr && first_prev_not_move->IsNullCheck()) {
- HNullCheck* null_check = first_prev_not_move->AsNullCheck();
- // TODO: The parallel moves modify the environment. Their changes need to be
- // reverted otherwise the stack maps at the throw point will not be correct.
+ HNullCheck* null_check = instr->GetImplicitNullCheck();
+ if (null_check != nullptr) {
RecordPcInfo(null_check, null_check->GetDexPc());
}
}
@@ -1461,7 +1491,12 @@ void CodeGenerator::ValidateInvokeRuntime(QuickEntrypointEnum entrypoint,
<< " instruction->GetSideEffects().ToString()="
<< instruction->GetSideEffects().ToString();
} else {
- DCHECK(instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
+ // 'CanTriggerGC' side effect is used to restrict optimization of instructions which depend
+ // on GC (e.g. IntermediateAddress) - to ensure they are not alive across GC points. However
+ // if execution never returns to the compiled code from a GC point this restriction is
+ // unnecessary - in particular for fatal slow paths which might trigger GC.
+ DCHECK((slow_path->IsFatal() && !instruction->GetLocations()->WillCall()) ||
+ instruction->GetSideEffects().Includes(SideEffects::CanTriggerGC()) ||
// When (non-Baker) read barriers are enabled, some instructions
// use a slow path to emit a read barrier, which does not trigger
// GC.
@@ -1519,7 +1554,7 @@ void CodeGenerator::ValidateInvokeRuntimeWithoutRecordingPcInfo(HInstruction* in
void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
@@ -1531,7 +1566,7 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo
stack_offset += codegen->SaveCoreRegister(stack_offset, i);
}
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
@@ -1543,14 +1578,14 @@ void SlowPathCode::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* lo
void SlowPathCode::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
stack_offset += codegen->RestoreCoreRegister(stack_offset, i);
}
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
@@ -1612,28 +1647,21 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) {
}
void CodeGenerator::EmitJitRoots(uint8_t* code,
- Handle<mirror::ObjectArray<mirror::Object>> roots,
- const uint8_t* roots_data) {
+ const uint8_t* roots_data,
+ /*out*/std::vector<Handle<mirror::Object>>* roots) {
code_generation_data_->EmitJitRoots(roots);
EmitJitRootPatches(code, roots_data);
}
-QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass) {
- ScopedObjectAccess soa(Thread::Current());
- if (array_klass == nullptr) {
- // This can only happen for non-primitive arrays, as primitive arrays can always
- // be resolved.
- return kQuickAllocArrayResolved32;
- }
-
- switch (array_klass->GetComponentSize()) {
- case 1: return kQuickAllocArrayResolved8;
- case 2: return kQuickAllocArrayResolved16;
- case 4: return kQuickAllocArrayResolved32;
- case 8: return kQuickAllocArrayResolved64;
+QuickEntrypointEnum CodeGenerator::GetArrayAllocationEntrypoint(HNewArray* new_array) {
+ switch (new_array->GetComponentSizeShift()) {
+ case 0: return kQuickAllocArrayResolved8;
+ case 1: return kQuickAllocArrayResolved16;
+ case 2: return kQuickAllocArrayResolved32;
+ case 3: return kQuickAllocArrayResolved64;
}
LOG(FATAL) << "Unreachable";
- return kQuickAllocArrayResolved;
+ UNREACHABLE();
}
} // namespace art
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index f784a1a857..f70ecb612d 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -21,20 +21,20 @@
#include "arch/instruction_set_features.h"
#include "base/arena_containers.h"
#include "base/arena_object.h"
+#include "base/array_ref.h"
#include "base/bit_field.h"
#include "base/bit_utils.h"
#include "base/enums.h"
+#include "base/globals.h"
+#include "base/memory_region.h"
#include "dex/string_reference.h"
#include "dex/type_reference.h"
-#include "globals.h"
#include "graph_visualizer.h"
#include "locations.h"
-#include "memory_region.h"
#include "nodes.h"
#include "optimizing_compiler_stats.h"
#include "read_barrier_option.h"
#include "stack.h"
-#include "stack_map.h"
#include "utils/label.h"
namespace art {
@@ -59,7 +59,6 @@ static constexpr ReadBarrierOption kCompilerReadBarrierOption =
class Assembler;
class CodeGenerator;
-class CompilerDriver;
class CompilerOptions;
class StackMapStream;
class ParallelMoveResolver;
@@ -74,6 +73,7 @@ class CodeAllocator {
virtual ~CodeAllocator() {}
virtual uint8_t* Allocate(size_t size) = 0;
+ virtual ArrayRef<const uint8_t> GetMemory() const = 0;
private:
DISALLOW_COPY_AND_ASSIGN(CodeAllocator);
@@ -187,8 +187,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// Compiles the graph to executable instructions.
void Compile(CodeAllocator* allocator);
static std::unique_ptr<CodeGenerator> Create(HGraph* graph,
- InstructionSet instruction_set,
- const InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGenerator();
@@ -210,6 +208,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual void Initialize() = 0;
virtual void Finalize(CodeAllocator* allocator);
virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches);
+ virtual bool NeedsThunkCode(const linker::LinkerPatch& patch) const;
+ virtual void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name);
virtual void GenerateFrameEntry() = 0;
virtual void GenerateFrameExit() = 0;
virtual void Bind(HBasicBlock* block) = 0;
@@ -318,7 +320,10 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
}
// Record native to dex mapping for a suspend point. Required by runtime.
- void RecordPcInfo(HInstruction* instruction, uint32_t dex_pc, SlowPathCode* slow_path = nullptr);
+ void RecordPcInfo(HInstruction* instruction,
+ uint32_t dex_pc,
+ SlowPathCode* slow_path = nullptr,
+ bool native_debug_info = false);
// Check whether we have already recorded mapping at this PC.
bool HasStackMapAtCurrentPc();
// Record extra stack maps if we support native debugging.
@@ -344,17 +349,14 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
void AddSlowPath(SlowPathCode* slow_path);
- void BuildStackMaps(MemoryRegion stack_map_region,
- MemoryRegion method_info_region,
- const DexFile::CodeItem* code_item_for_osr_check);
- void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
+ ScopedArenaVector<uint8_t> BuildStackMaps(const dex::CodeItem* code_item_for_osr_check);
size_t GetNumberOfJitRoots() const;
// Fills the `literals` array with literals collected during code generation.
// Also emits literal patches.
void EmitJitRoots(uint8_t* code,
- Handle<mirror::ObjectArray<mirror::Object>> roots,
- const uint8_t* roots_data)
+ const uint8_t* roots_data,
+ /*out*/std::vector<Handle<mirror::Object>>* roots)
REQUIRES_SHARED(Locks::mutator_lock_);
bool IsLeafMethod() const {
@@ -438,6 +440,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
case TypeCheckKind::kArrayCheck:
case TypeCheckKind::kUnresolvedCheck:
return false;
+ case TypeCheckKind::kBitstringCheck:
+ return true;
}
LOG(FATAL) << "Unreachable";
UNREACHABLE();
@@ -535,10 +539,13 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
void GenerateInvokeStaticOrDirectRuntimeCall(
HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
+
void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
+ void GenerateInvokeCustomCall(HInvokeCustom* invoke);
+
void CreateUnresolvedFieldLocationSummary(
HInstruction* field_access,
DataType::Type field_type,
@@ -556,6 +563,20 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
Location runtime_return_location);
void GenerateLoadClassRuntimeCall(HLoadClass* cls);
+ static void CreateLoadMethodHandleRuntimeCallLocationSummary(HLoadMethodHandle* method_handle,
+ Location runtime_handle_index_location,
+ Location runtime_return_location);
+ void GenerateLoadMethodHandleRuntimeCall(HLoadMethodHandle* method_handle);
+
+ static void CreateLoadMethodTypeRuntimeCallLocationSummary(HLoadMethodType* method_type,
+ Location runtime_type_index_location,
+ Location runtime_return_location);
+ void GenerateLoadMethodTypeRuntimeCall(HLoadMethodType* method_type);
+
+ uint32_t GetBootImageOffset(HLoadClass* load_class);
+ uint32_t GetBootImageOffset(HLoadString* load_string);
+ uint32_t GetBootImageOffset(HInvokeStaticOrDirect* invoke);
+
static void CreateSystemArrayCopyLocationSummary(HInvoke* invoke);
void SetDisassemblyInformation(DisassemblyInformation* info) { disasm_info_ = info; }
@@ -600,7 +621,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
// otherwise return a fall-back info that should be used instead.
virtual HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) = 0;
+ ArtMethod* method) = 0;
// Generate a call to a static or direct method.
virtual void GenerateStaticOrDirectCall(
@@ -614,7 +635,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> {
virtual void GenerateNop() = 0;
- static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
+ static QuickEntrypointEnum GetArrayAllocationEntrypoint(HNewArray* new_array);
protected:
// Patch info used for recording locations of required linker patches and their targets,
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 60f8f98757..3086882678 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -27,10 +27,10 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_arm64.h"
-#include "linker/arm64/relative_patcher_arm64.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
@@ -64,12 +64,11 @@ using helpers::DRegisterFrom;
using helpers::FPRegisterFrom;
using helpers::HeapOperand;
using helpers::HeapOperandFrom;
-using helpers::InputCPURegisterAt;
using helpers::InputCPURegisterOrZeroRegAt;
using helpers::InputFPRegisterAt;
using helpers::InputOperandAt;
using helpers::InputRegisterAt;
-using helpers::Int64ConstantFrom;
+using helpers::Int64FromLocation;
using helpers::IsConstantZeroBitPattern;
using helpers::LocationFrom;
using helpers::OperandFromMemOperand;
@@ -90,25 +89,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
// Reference load (except object array loads) is using LDR Wt, [Xn, #offset] which can handle
// offset < 16KiB. For offsets >= 16KiB, the load shall be emitted as two or more instructions.
-// For the Baker read barrier implementation using link-generated thunks we need to split
+// For the Baker read barrier implementation using link-time generated thunks we need to split
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 16 * KB;
-// Flags controlling the use of link-time generated thunks for Baker read barriers.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-
-// Some instructions have special requirements for a temporary, for example
-// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
-// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
-// loads with large offsets need a fixed register to limit the number of link-time
-// thunks we generate. For these and similar cases, we want to reserve a specific
-// register that's neither callee-save nor an argument register. We choose x15.
-inline Location FixedTempLocation() {
- return Location::RegisterLocation(x15.GetCode());
-}
-
inline Condition ARM64Condition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
@@ -165,6 +149,16 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return
return ARM64ReturnLocation(return_type);
}
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+ DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+ RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
+ DataType::Type::kReference).GetCode());
+ return caller_saves;
+}
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArm64PointerSize, x).Int32Value()
@@ -174,8 +168,8 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
LocationSummary* locations,
int64_t spill_offset,
bool is_save) {
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
DCHECK(ArtVixlRegCodeCoherentForRegSet(core_spills,
codegen->GetNumberOfCoreRegisters(),
fp_spills,
@@ -218,7 +212,7 @@ static void SaveRestoreLiveRegistersHelper(CodeGenerator* codegen,
void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
@@ -230,7 +224,7 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar
stack_offset += kXRegSizeInBytes;
}
- const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
@@ -240,20 +234,20 @@ void SlowPathCodeARM64::SaveLiveRegisters(CodeGenerator* codegen, LocationSummar
SaveRestoreLiveRegistersHelper(codegen,
locations,
- codegen->GetFirstRegisterSlotInSlowPath(), true /* is_save */);
+ codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ true);
}
void SlowPathCodeARM64::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
SaveRestoreLiveRegistersHelper(codegen,
locations,
- codegen->GetFirstRegisterSlotInSlowPath(), false /* is_save */);
+ codegen->GetFirstRegisterSlotInSlowPath(), /* is_save= */ false);
}
class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit BoundsCheckSlowPathARM64(HBoundsCheck* instruction) : SlowPathCodeARM64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -279,9 +273,9 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARM64"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARM64);
@@ -291,16 +285,16 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit DivZeroCheckSlowPathARM64(HDivZeroCheck* instruction) : SlowPathCodeARM64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
arm64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARM64"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM64);
@@ -308,35 +302,41 @@ class DivZeroCheckSlowPathARM64 : public SlowPathCodeARM64 {
class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
public:
- LoadClassSlowPathARM64(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCodeARM64(at),
- cls_(cls),
- dex_pc_(dex_pc),
- do_clinit_(do_clinit) {
+ LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at)
+ : SlowPathCodeARM64(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
- dex::TypeIndex type_index = cls_->GetTypeIndex();
- __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
- QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
- : kQuickInitializeType;
- arm64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), arm64_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_);
+ arm64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ arm64_codegen->MoveLocation(LocationFrom(calling_convention.GetRegisterAt(0)),
+ source,
+ cls_->GetType());
+ }
+ if (must_do_clinit) {
+ arm64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
@@ -349,18 +349,12 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARM64"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathARM64"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64);
};
@@ -369,7 +363,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
explicit LoadStringSlowPathARM64(HLoadString* instruction)
: SlowPathCodeARM64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -390,7 +384,7 @@ class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
@@ -400,7 +394,7 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit NullCheckSlowPathARM64(HNullCheck* instr) : SlowPathCodeARM64(instr) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -414,9 +408,9 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARM64"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64);
@@ -427,7 +421,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
SuspendCheckSlowPathARM64(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeARM64(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
@@ -451,7 +445,7 @@ class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 {
return successor_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARM64"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathARM64"; }
private:
// If not null, the block to branch to after the suspend check.
@@ -468,7 +462,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
TypeCheckSlowPathARM64(HInstruction* instruction, bool is_fatal)
: SlowPathCodeARM64(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(instruction_->IsCheckCast()
@@ -509,8 +503,8 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARM64"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathARM64"; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -523,7 +517,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
explicit DeoptimizationSlowPathARM64(HDeoptimize* instruction)
: SlowPathCodeARM64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -535,7 +529,7 @@ class DeoptimizationSlowPathARM64 : public SlowPathCodeARM64 {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARM64"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARM64);
@@ -545,7 +539,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
public:
explicit ArraySetSlowPathARM64(HInstruction* instruction) : SlowPathCodeARM64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -576,7 +570,7 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARM64"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathARM64"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM64);
@@ -605,503 +599,6 @@ void JumpTableARM64::EmitTable(CodeGeneratorARM64* codegen) {
}
}
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathBaseARM64 : public SlowPathCodeARM64 {
- protected:
- ReadBarrierMarkSlowPathBaseARM64(HInstruction* instruction, Location ref, Location entrypoint)
- : SlowPathCodeARM64(instruction), ref_(ref), entrypoint_(entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARM64"; }
-
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- DCHECK_NE(ref_.reg(), LR);
- DCHECK_NE(ref_.reg(), WSP);
- DCHECK_NE(ref_.reg(), WZR);
- // IP0 is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK_NE(ref_.reg(), IP0);
- DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in W0):
- //
- // W0 <- ref
- // W0 <- ReadBarrierMark(W0)
- // ref <- W0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blr(XRegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
- // This runtime call does not require a stack map.
- arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
- }
-
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if it is already loaded.
- const Location entrypoint_;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARM64);
-};
-
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARM64"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierSlowPathARM64 : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARM64(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- use_load_acquire_(use_load_acquire),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARM64";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(obj_.IsW());
- DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- // Temporary register `temp_`, used to store the lock word, must
- // not be IP0 nor IP1, as we may use them to emit the reference
- // load (in the call to GenerateRawReferenceLoad below), and we
- // need the lock word to still be in `temp_` after the reference
- // load.
- DCHECK_NE(LocationFrom(temp_).reg(), IP0);
- DCHECK_NE(LocationFrom(temp_).reg(), IP1);
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->GenerateRawReferenceLoad(instruction_,
- ref_,
- obj_,
- offset_,
- index_,
- scale_factor_,
- /* needs_null_check */ false,
- use_load_acquire_);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- size_t scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // Should this reference load use Load-Acquire semantics?
- bool use_load_acquire_;
- // A temporary register used to hold the lock word of `obj_`.
- Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARM64);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARM64 above, which never
-// tries to update `obj.field`).
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64
- : public ReadBarrierMarkSlowPathBaseARM64 {
- public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
- HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire,
- Register temp,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARM64(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- use_load_acquire_(use_load_acquire),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- Register ref_reg = WRegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(obj_.IsW());
- DCHECK_NE(ref_.reg(), LocationFrom(temp_).reg());
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
- DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking and field updating slow path: "
- << instruction_->DebugName();
- DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, 0u);
- DCHECK_EQ(use_load_acquire_, false);
- // The location of the offset of the marked reference field within `obj_`.
- Location field_offset = index_;
- DCHECK(field_offset.IsRegister()) << field_offset;
-
- // Temporary register `temp_`, used to store the lock word, must
- // not be IP0 nor IP1, as we may use them to emit the reference
- // load (in the call to GenerateRawReferenceLoad below), and we
- // need the lock word to still be in `temp_` after the reference
- // load.
- DCHECK_NE(LocationFrom(temp_).reg(), IP0);
- DCHECK_NE(LocationFrom(temp_).reg(), IP1);
-
- __ Bind(GetEntryLabel());
-
- // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARM64's:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
- // }
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- __ Ldr(temp_, HeapOperand(obj_, monitor_offset));
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including rb_state,
- // to prevent load-load reordering, and without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_.X(), obj_.X(), Operand(temp_.X(), LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
- arm64_codegen->GenerateRawReferenceLoad(instruction_,
- ref_,
- obj_,
- offset_,
- index_,
- scale_factor_,
- /* needs_null_check */ false,
- use_load_acquire_);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Tbz(temp_, LockWord::kReadBarrierStateShift, GetExitLabel());
-
- // Save the old value of the reference before marking it.
- // Note that we cannot use IP to save the old reference, as IP is
- // used internally by the ReadBarrierMarkRegX entry point, and we
- // need the old reference after the call to that entry point.
- DCHECK_NE(LocationFrom(temp_).reg(), IP0);
- __ Mov(temp_.W(), ref_reg);
-
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- // If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
- //
- // Note that this field could also hold a different object, if
- // another thread had concurrently changed it. In that case, the
- // LDXR/CMP/BNE sequence of instructions in the compare-and-set
- // (CAS) operation below would abort the CAS, leaving the field
- // as-is.
- __ Cmp(temp_.W(), ref_reg);
- __ B(eq, GetExitLabel());
-
- // Update the the holder's field atomically. This may fail if
- // mutator updates before us, but it's OK. This is achieved
- // using a strong compare-and-set (CAS) operation with relaxed
- // memory synchronization ordering, where the expected value is
- // the old reference and the desired value is the new reference.
-
- MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
- UseScratchRegisterScope temps(masm);
-
- // Convenience aliases.
- Register base = obj_.W();
- Register offset = XRegisterFrom(field_offset);
- Register expected = temp_.W();
- Register value = ref_reg;
- Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
- Register tmp_value = temps.AcquireW(); // Value in memory.
-
- __ Add(tmp_ptr, base.X(), Operand(offset));
-
- if (kPoisonHeapReferences) {
- arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- arm64_codegen->GetAssembler()->PoisonHeapReference(value);
- }
- }
-
- // do {
- // tmp_value = [tmp_ptr] - expected;
- // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
-
- vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
- __ Bind(&loop_head);
- __ Ldxr(tmp_value, MemOperand(tmp_ptr));
- __ Cmp(tmp_value, expected);
- __ B(&comparison_failed, ne);
- __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
- __ Cbnz(tmp_value, &loop_head);
- __ B(&exit_loop);
- __ Bind(&comparison_failed);
- __ Clrex();
- __ Bind(&exit_loop);
-
- if (kPoisonHeapReferences) {
- arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
- }
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- const Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- size_t scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // Should this reference load use Load-Acquire semantics?
- bool use_load_acquire_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
- const Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64);
-};
-
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
public:
@@ -1131,7 +628,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
@@ -1257,7 +754,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
+ const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathARM64"; }
private:
Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
@@ -1297,7 +794,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
DCHECK(locations->CanCall());
@@ -1334,7 +831,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARM64"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARM64"; }
private:
const Location out_;
@@ -1373,7 +870,6 @@ Location InvokeDexCallingConventionVisitorARM64::GetMethodLocation() const {
}
CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
- const Arm64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
@@ -1389,8 +885,8 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator()),
- isa_features_(isa_features),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()),
uint32_literals_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
uint64_literals_(std::less<uint64_t>(),
@@ -1401,11 +897,14 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph,
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Save the link register (containing the return address) to mimic Quick.
AddAllocatedRegister(LocationFrom(lr));
}
@@ -1420,10 +919,86 @@ void CodeGeneratorARM64::EmitJumpTables() {
void CodeGeneratorARM64::Finalize(CodeAllocator* allocator) {
EmitJumpTables();
+
+ // Emit JIT baker read barrier slow paths.
+ DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
+ for (auto& entry : jit_baker_read_barrier_slow_paths_) {
+ uint32_t encoded_data = entry.first;
+ vixl::aarch64::Label* slow_path_entry = &entry.second.label;
+ __ Bind(slow_path_entry);
+ CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
+ }
+
// Ensure we emit the literal pool.
__ FinalizeCode();
CodeGenerator::Finalize(allocator);
+
+ // Verify Baker read barrier linker patches.
+ if (kIsDebugBuild) {
+ ArrayRef<const uint8_t> code = allocator->GetMemory();
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ DCHECK(info.label.IsBound());
+ uint32_t literal_offset = info.label.GetLocation();
+ DCHECK_ALIGNED(literal_offset, 4u);
+
+ auto GetInsn = [&code](uint32_t offset) {
+ DCHECK_ALIGNED(offset, 4u);
+ return
+ (static_cast<uint32_t>(code[offset + 0]) << 0) +
+ (static_cast<uint32_t>(code[offset + 1]) << 8) +
+ (static_cast<uint32_t>(code[offset + 2]) << 16)+
+ (static_cast<uint32_t>(code[offset + 3]) << 24);
+ };
+
+ const uint32_t encoded_data = info.custom_data;
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ // Check that the next instruction matches the expected LDR.
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ case BakerReadBarrierKind::kAcquire: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(literal_offset + 4u);
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ if (kind == BakerReadBarrierKind::kField) {
+ // LDR (immediate) with correct base_reg.
+ CHECK_EQ(next_insn & 0xffc003e0u, 0xb9400000u | (base_reg << 5));
+ } else {
+ DCHECK(kind == BakerReadBarrierKind::kAcquire);
+ // LDAR with correct base_reg.
+ CHECK_EQ(next_insn & 0xffffffe0u, 0x88dffc00u | (base_reg << 5));
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn(literal_offset + 4u);
+ // LDR (register) with the correct base_reg, size=10 (32-bit), option=011 (extend = LSL),
+ // and S=1 (shift amount = 2 for 32-bit version), i.e. LDR Wt, [Xn, Xm, LSL #2].
+ CheckValidReg(next_insn & 0x1fu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffe0ffe0u, 0xb8607800u | (base_reg << 5));
+ CheckValidReg((next_insn >> 16) & 0x1f); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn(literal_offset - 4u);
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ // Usually LDR (immediate) with correct root_reg but
+ // we may have a "MOV marked, old_value" for UnsafeCASObject.
+ if ((prev_insn & 0xffe0ffff) != (0x2a0003e0 | root_reg)) { // MOV?
+ CHECK_EQ(prev_insn & 0xffc0001fu, 0xb9400000u | root_reg); // LDR?
+ }
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+ }
+ }
}
void ParallelMoveResolverARM64::PrepareForEmitNativeCode() {
@@ -1543,7 +1118,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
}
}
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void CodeGeneratorARM64::GenerateFrameExit() {
@@ -1600,8 +1175,24 @@ void CodeGeneratorARM64::MarkGCCard(Register object, Register value, bool value_
if (value_can_be_null) {
__ Cbz(value, &done);
}
+ // Load the address of the card table into `card`.
__ Ldr(card, MemOperand(tr, Thread::CardTableOffset<kArm64PointerSize>().Int32Value()));
+ // Calculate the offset (in the card table) of the card corresponding to
+ // `object`.
__ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp.X()));
if (value_can_be_null) {
__ Bind(&done);
@@ -1615,6 +1206,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
// mr : Runtime reserved.
// ip1 : VIXL core temp.
// ip0 : VIXL core temp.
+ // x18 : Platform register.
//
// Blocked fp registers:
// d31 : VIXL fp temp.
@@ -1623,6 +1215,7 @@ void CodeGeneratorARM64::SetupBlockedRegisters() const {
while (!reserved_core_registers.IsEmpty()) {
blocked_core_registers_[reserved_core_registers.PopLowestIndex().GetCode()] = true;
}
+ blocked_core_registers_[X18] = true;
CPURegList reserved_fp_registers = vixl_reserved_fp_registers;
while (!reserved_fp_registers.IsEmpty()) {
@@ -1672,6 +1265,10 @@ void CodeGeneratorARM64::DumpFloatingPointRegister(std::ostream& stream, int reg
stream << DRegister(reg);
}
+const Arm64InstructionSetFeatures& CodeGeneratorARM64::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsArm64InstructionSetFeatures();
+}
+
void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* constant) {
if (constant->IsIntConstant()) {
__ Mov(Register(destination), constant->AsIntConstant()->GetValue());
@@ -2128,6 +1725,26 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARM64::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check, vixl::aarch64::Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, HeapOperand(temp, mirror::Class::StatusOffset()));
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ }
+ // Compare the bitstring bits to `path_to_root`.
+ __ Cmp(temp, path_to_root);
+}
+
void CodeGeneratorARM64::GenerateMemoryBarrier(MemBarrierKind kind) {
BarrierType type = BarrierAll;
@@ -2224,18 +1841,12 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction,
: LocationSummary::kNoCall);
if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- !field_info.IsVolatile()) {
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // non-volatile loads we need a temporary only if the offset is too big.
- if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
- locations->AddTemp(FixedTempLocation());
- }
- } else {
- locations->AddTemp(Location::RequiresRegister());
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the field is volatile or the offset is too big.
+ if (field_info.IsVolatile() ||
+ field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
+ locations->AddTemp(FixedTempLocation());
}
}
locations->SetInAt(0, Location::RequiresRegister());
@@ -2277,7 +1888,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
base,
offset,
maybe_temp,
- /* needs_null_check */ true,
+ /* needs_null_check= */ true,
field_info.IsVolatile());
} else {
// General case.
@@ -2286,7 +1897,7 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction,
// CodeGeneratorARM64::LoadAcquire call.
// NB: LoadAcquire will record the pc info if needed.
codegen_->LoadAcquire(
- instruction, OutputCPURegister(instruction), field, /* needs_null_check */ true);
+ instruction, OutputCPURegister(instruction), field, /* needs_null_check= */ true);
} else {
// Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
@@ -2341,7 +1952,7 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction,
if (field_info.IsVolatile()) {
codegen_->StoreRelease(
- instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check */ true);
+ instruction, field_type, source, HeapOperand(obj, offset), /* needs_null_check= */ true);
} else {
// Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted.
EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
@@ -2383,6 +1994,9 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
// all & reg_bits - 1.
__ Ror(dst, lhs, RegisterFrom(instr->GetLocations()->InAt(1), type));
}
+ } else if (instr->IsMin() || instr->IsMax()) {
+ __ Cmp(lhs, rhs);
+ __ Csel(dst, lhs, rhs, instr->IsMin() ? lt : gt);
} else {
DCHECK(instr->IsXor());
__ Eor(dst, lhs, rhs);
@@ -2398,6 +2012,10 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) {
__ Fadd(dst, lhs, rhs);
} else if (instr->IsSub()) {
__ Fsub(dst, lhs, rhs);
+ } else if (instr->IsMin()) {
+ __ Fmin(dst, lhs, rhs);
+ } else if (instr->IsMax()) {
+ __ Fmax(dst, lhs, rhs);
} else {
LOG(FATAL) << "Unexpected floating-point binary operation";
}
@@ -2618,7 +2236,7 @@ void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIn
void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex(
HIntermediateAddressIndex* instruction) {
Register index_reg = InputRegisterAt(instruction, 0);
- uint32_t shift = Int64ConstantFrom(instruction->GetLocations()->InAt(2));
+ uint32_t shift = Int64FromLocation(instruction->GetLocations()->InAt(2));
uint32_t offset = instruction->GetOffset()->AsIntConstant()->GetValue();
if (shift == 0) {
@@ -2691,21 +2309,21 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) {
: LocationSummary::kNoCall);
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- instruction->GetIndex()->IsConstant()) {
+ if (instruction->GetIndex()->IsConstant()) {
// Array loads with constant index are treated as field loads.
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // constant index loads we need a temporary only if the offset is too big.
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
offset += index << DataType::SizeShift(DataType::Type::kReference);
if (offset >= kReferenceLoadMinFarOffset) {
locations->AddTemp(FixedTempLocation());
}
- } else {
+ } else if (!instruction->GetArray()->IsIntermediateAddress()) {
+ // We need a non-scratch temporary for the array data pointer in
+ // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier() for the case with no
+ // intermediate address.
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -2735,11 +2353,12 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
MacroAssembler* masm = GetVIXLAssembler();
UseScratchRegisterScope temps(masm);
- // The read barrier instrumentation of object ArrayGet instructions
+ // The non-Baker read barrier instrumentation of object ArrayGet instructions
// does not support the HIntermediateAddress instruction.
DCHECK(!((type == DataType::Type::kReference) &&
instruction->GetArray()->IsIntermediateAddress() &&
- kEmitCompilerReadBarrier));
+ kEmitCompilerReadBarrier &&
+ !kUseBakerReadBarrier));
if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// Object ArrayGet with Baker's read barrier case.
@@ -2747,8 +2366,9 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
+ DCHECK(!instruction->GetArray()->IsIntermediateAddress());
// Array load with a constant index can be treated as a field load.
- offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
+ offset += Int64FromLocation(index) << DataType::SizeShift(type);
Location maybe_temp =
(locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation();
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
@@ -2756,12 +2376,11 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
obj.W(),
offset,
maybe_temp,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
} else {
- Register temp = WRegisterFrom(locations->GetTemp(0));
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out, obj.W(), offset, index, temp, /* needs_null_check */ false);
+ instruction, out, obj.W(), offset, index, /* needs_null_check= */ false);
}
} else {
// General case.
@@ -2793,14 +2412,14 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
"Expecting 0=compressed, 1=uncompressed");
__ Tbnz(length.W(), 0, &uncompressed_load);
__ Ldrb(Register(OutputCPURegister(instruction)),
- HeapOperand(obj, offset + Int64ConstantFrom(index)));
+ HeapOperand(obj, offset + Int64FromLocation(index)));
__ B(&done);
__ Bind(&uncompressed_load);
__ Ldrh(Register(OutputCPURegister(instruction)),
- HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1)));
+ HeapOperand(obj, offset + (Int64FromLocation(index) << 1)));
__ Bind(&done);
} else {
- offset += Int64ConstantFrom(index) << DataType::SizeShift(type);
+ offset += Int64FromLocation(index) << DataType::SizeShift(type);
source = HeapOperand(obj, offset);
}
} else {
@@ -2810,8 +2429,8 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) {
// input instruction has done it already. See the comment in
// `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
- DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
+ HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
+ DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), offset);
}
temp = obj;
} else {
@@ -2913,7 +2532,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
if (!needs_write_barrier) {
DCHECK(!may_need_runtime_call_for_type_check);
if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
+ offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
destination = HeapOperand(array, offset);
} else {
UseScratchRegisterScope temps(masm);
@@ -2923,8 +2542,8 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
// input instruction has done it already. See the comment in
// `TryExtractArrayAccessAddress()`.
if (kIsDebugBuild) {
- HIntermediateAddress* tmp = instruction->GetArray()->AsIntermediateAddress();
- DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
+ HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
+ DCHECK(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64() == offset);
}
temp = array;
} else {
@@ -2951,7 +2570,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) {
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireSameSizeAs(array);
if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type);
+ offset += Int64FromLocation(index) << DataType::SizeShift(value_type);
destination = HeapOperand(array, offset);
} else {
destination = HeapOperand(temp,
@@ -3093,12 +2712,14 @@ void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
}
void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) {
// We assume the class is not null.
- SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
- check->GetLoadClass(), check, check->GetDexPc(), true);
+ SlowPathCodeARM64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
}
@@ -3260,61 +2881,30 @@ FOR_EACH_CONDITION_INSTRUCTION(DEFINE_CONDITION_VISITORS)
#undef DEFINE_CONDITION_VISITORS
#undef FOR_EACH_CONDITION_INSTRUCTION
-void InstructionCodeGeneratorARM64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
- DCHECK(instruction->IsDiv() || instruction->IsRem());
-
- LocationSummary* locations = instruction->GetLocations();
- Location second = locations->InAt(1);
- DCHECK(second.IsConstant());
+void InstructionCodeGeneratorARM64::GenerateIntDivForPower2Denom(HDiv* instruction) {
+ int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+ DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
- int64_t imm = Int64FromConstant(second.GetConstant());
- DCHECK(imm == 1 || imm == -1);
- if (instruction->IsRem()) {
- __ Mov(out, 0);
+ if (abs_imm == 2) {
+ int bits = DataType::Size(instruction->GetResultType()) * kBitsPerByte;
+ __ Add(out, dividend, Operand(dividend, LSR, bits - 1));
} else {
- if (imm == 1) {
- __ Mov(out, dividend);
- } else {
- __ Neg(out, dividend);
- }
- }
-}
-
-void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruction) {
- DCHECK(instruction->IsDiv() || instruction->IsRem());
-
- LocationSummary* locations = instruction->GetLocations();
- Location second = locations->InAt(1);
- DCHECK(second.IsConstant());
-
- Register out = OutputRegister(instruction);
- Register dividend = InputRegisterAt(instruction, 0);
- int64_t imm = Int64FromConstant(second.GetConstant());
- uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
- int ctz_imm = CTZ(abs_imm);
-
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = temps.AcquireSameSizeAs(out);
-
- if (instruction->IsDiv()) {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
__ Add(temp, dividend, abs_imm - 1);
__ Cmp(dividend, 0);
__ Csel(out, temp, dividend, lt);
- if (imm > 0) {
- __ Asr(out, out, ctz_imm);
- } else {
- __ Neg(out, Operand(out, ASR, ctz_imm));
- }
+ }
+
+ int ctz_imm = CTZ(abs_imm);
+ if (imm > 0) {
+ __ Asr(out, out, ctz_imm);
} else {
- int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64;
- __ Asr(temp, dividend, bits - 1);
- __ Lsr(temp, temp, bits - ctz_imm);
- __ Add(out, dividend, temp);
- __ And(out, out, abs_imm - 1);
- __ Sub(out, out, temp);
+ __ Neg(out, Operand(out, ASR, ctz_imm));
}
}
@@ -3335,7 +2925,7 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
int64_t magic;
int shift;
CalculateMagicAndShiftForDivRem(
- imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift);
+ imm, /* is_long= */ type == DataType::Type::kInt64, &magic, &shift);
UseScratchRegisterScope temps(GetVIXLAssembler());
Register temp = temps.AcquireSameSizeAs(out);
@@ -3370,39 +2960,34 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati
}
}
-void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
- DCHECK(instruction->IsDiv() || instruction->IsRem());
- DataType::Type type = instruction->GetResultType();
- DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+void InstructionCodeGeneratorARM64::GenerateIntDivForConstDenom(HDiv *instruction) {
+ int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
- LocationSummary* locations = instruction->GetLocations();
- Register out = OutputRegister(instruction);
- Location second = locations->InAt(1);
+ if (imm == 0) {
+ // Do not generate anything. DivZeroCheck would prevent any code to be executed.
+ return;
+ }
- if (second.IsConstant()) {
- int64_t imm = Int64FromConstant(second.GetConstant());
+ if (IsPowerOfTwo(AbsOrMin(imm))) {
+ GenerateIntDivForPower2Denom(instruction);
+ } else {
+ // Cases imm == -1 or imm == 1 are handled by InstructionSimplifier.
+ DCHECK(imm < -2 || imm > 2) << imm;
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+}
- if (imm == 0) {
- // Do not generate anything. DivZeroCheck would prevent any code to be executed.
- } else if (imm == 1 || imm == -1) {
- DivRemOneOrMinusOne(instruction);
- } else if (IsPowerOfTwo(AbsOrMin(imm))) {
- DivRemByPowerOfTwo(instruction);
- } else {
- DCHECK(imm <= -2 || imm >= 2);
- GenerateDivRemWithAnyConstant(instruction);
- }
+void InstructionCodeGeneratorARM64::GenerateIntDiv(HDiv *instruction) {
+ DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
+ << instruction->GetResultType();
+
+ if (instruction->GetLocations()->InAt(1).IsConstant()) {
+ GenerateIntDivForConstDenom(instruction);
} else {
+ Register out = OutputRegister(instruction);
Register dividend = InputRegisterAt(instruction, 0);
Register divisor = InputRegisterAt(instruction, 1);
- if (instruction->IsDiv()) {
- __ Sdiv(out, dividend, divisor);
- } else {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = temps.AcquireSameSizeAs(out);
- __ Sdiv(temp, dividend, divisor);
- __ Msub(out, temp, divisor, dividend);
- }
+ __ Sdiv(out, dividend, divisor);
}
}
@@ -3434,7 +3019,7 @@ void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) {
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64:
- GenerateDivRemIntegral(div);
+ GenerateIntDiv(div);
break;
case DataType::Type::kFloat32:
@@ -3462,11 +3047,11 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction
if (!DataType::IsIntegralType(type)) {
LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
- return;
+ UNREACHABLE();
}
if (value.IsConstant()) {
- int64_t divisor = Int64ConstantFrom(value);
+ int64_t divisor = Int64FromLocation(value);
if (divisor == 0) {
__ B(slow_path->GetEntryLabel());
} else {
@@ -3531,7 +3116,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s
}
if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
__ B(codegen_->GetLabelOf(successor));
@@ -3681,7 +3266,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) {
if (codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor)) {
false_target = nullptr;
}
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -3700,9 +3285,9 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeARM64* slow_path =
deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARM64>(deoptimize);
GenerateTestAndBranch(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
@@ -3865,6 +3450,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -3873,7 +3460,13 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -3886,7 +3479,9 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -4032,7 +3627,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
__ Cmp(out, cls);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
@@ -4064,7 +3659,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
@@ -4072,6 +3667,23 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Cset(out, eq);
+ if (zero.IsLinked()) {
+ __ B(&done);
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -4094,7 +3706,13 @@ void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -4104,7 +3722,9 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = InputRegisterAt(instruction, 0);
- Register cls = InputRegisterAt(instruction, 1);
+ Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? Register()
+ : InputRegisterAt(instruction, 1);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
DCHECK_GE(num_temps, 1u);
DCHECK_LE(num_temps, 3u);
@@ -4285,6 +3905,20 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
__ B(ne, &start_loop);
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
@@ -4318,7 +3952,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) {
@@ -4388,7 +4022,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4424,7 +4058,7 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM64* codege
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARM64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
// On ARM64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -4455,21 +4089,32 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall(
EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- // Load method address from literal pool.
- __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_offset);
+ EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_offset, adrp_label);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ EmitLdrOffsetPlaceholder(ldr_label, WRegisterFrom(temp), XRegisterFrom(temp));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
- // Add ADRP with its PC-relative DexCache access patch.
+ // Add ADRP with its PC-relative .bss entry patch.
MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
- // Add LDR with its PC-relative DexCache access patch.
+ // Add LDR with its PC-relative .bss entry patch.
vixl::aarch64::Label* ldr_label =
NewMethodBssEntryPatch(target_method, adrp_label);
EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ // Load method address from literal pool.
+ __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -4556,7 +4201,30 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
codegen_->GenerateInvokePolymorphicCall(invoke);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
+}
+
+void LocationsBuilderARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARM64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, intrinsic_data, adrp_label, &boot_image_intrinsic_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, boot_image_offset, adrp_label, &boot_image_method_patches_);
}
vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch(
@@ -4602,9 +4270,18 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch(
return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_);
}
-vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
- baker_read_barrier_patches_.emplace_back(custom_data);
- return &baker_read_barrier_patches_.back().label;
+void CodeGeneratorARM64::EmitBakerReadBarrierCbnz(uint32_t custom_data) {
+ DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
+ if (Runtime::Current()->UseJitCompilation()) {
+ auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
+ vixl::aarch64::Label* slow_path_entry = &it->second.label;
+ __ cbnz(mr, slow_path_entry);
+ } else {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ vixl::aarch64::Label* cbnz_label = &baker_read_barrier_patches_.back().label;
+ __ bind(cbnz_label);
+ __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
+ }
}
vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch(
@@ -4631,7 +4308,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLitera
ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
return jit_string_patches_.GetOrCreate(
StringReference(&dex_file, string_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+ [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
}
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral(
@@ -4639,7 +4316,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral
ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
return jit_class_patches_.GetOrCreate(
TypeReference(&dex_file, type_index),
- [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); });
+ [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u); });
}
void CodeGeneratorARM64::EmitAdrpPlaceholder(vixl::aarch64::Label* fixup_label,
@@ -4669,6 +4346,55 @@ void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_la
__ ldr(out, MemOperand(base, /* offset placeholder */ 0));
}
+void CodeGeneratorARM64::LoadBootImageAddress(vixl::aarch64::Register reg,
+ uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ // Add ADRP with its PC-relative type patch.
+ vixl::aarch64::Label* adrp_label = NewBootImageIntrinsicPatch(boot_image_reference);
+ EmitAdrpPlaceholder(adrp_label, reg.X());
+ // Add ADD with its PC-relative type patch.
+ vixl::aarch64::Label* add_label = NewBootImageIntrinsicPatch(boot_image_reference, adrp_label);
+ EmitAddPlaceholder(add_label, reg.X(), reg.X());
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* adrp_label = NewBootImageRelRoPatch(boot_image_reference);
+ EmitAdrpPlaceholder(adrp_label, reg.X());
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* ldr_label = NewBootImageRelRoPatch(boot_image_reference, adrp_label);
+ EmitLdrOffsetPlaceholder(ldr_label, reg.W(), reg.X());
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
+ __ Ldr(reg.W(), DeduplicateBootImageAddressLiteral(reinterpret_cast<uintptr_t>(address)));
+ }
+}
+
+void CodeGeneratorARM64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ // Add ADRP with its PC-relative type patch.
+ vixl::aarch64::Label* adrp_label = NewBootImageTypePatch(*target_method.dex_file, type_idx);
+ EmitAdrpPlaceholder(adrp_label, argument.X());
+ // Add ADD with its PC-relative type patch.
+ vixl::aarch64::Label* add_label =
+ NewBootImageTypePatch(*target_method.dex_file, type_idx, adrp_label);
+ EmitAddPlaceholder(add_label, argument.X(), argument.X());
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
@@ -4681,6 +4407,15 @@ inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4690,6 +4425,7 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
string_bss_entry_patches_.size() +
+ boot_image_intrinsic_patches_.size() +
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
@@ -4699,12 +4435,14 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -4719,6 +4457,44 @@ void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* lin
DCHECK_EQ(size, linker_patches->size());
}
+bool CodeGeneratorARM64::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+ return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+ patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARM64::EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) {
+ Arm64Assembler assembler(GetGraph()->GetAllocator());
+ switch (patch.GetType()) {
+ case linker::LinkerPatch::Type::kCallRelative: {
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ Offset offset(ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64PointerSize).Int32Value());
+ assembler.JumpTo(ManagedRegister(arm64::X0), offset, ManagedRegister(arm64::IP0));
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ *debug_name = "MethodCallThunk";
+ }
+ break;
+ }
+ case linker::LinkerPatch::Type::kBakerReadBarrierBranch: {
+ DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+ CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+ UNREACHABLE();
+ }
+
+ // Ensure we emit the literal pool if any.
+ assembler.FinalizeCode();
+ code->resize(assembler.CodeSize());
+ MemoryRegion code_region(code->data(), code->size());
+ assembler.FinalizeInstructions(code_region);
+}
+
vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
return uint32_literals_.GetOrCreate(
value,
@@ -4737,7 +4513,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
return;
}
@@ -4750,12 +4526,12 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir
invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
return;
}
@@ -4767,7 +4543,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
DCHECK(!codegen_->IsLeafMethod());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
@@ -4779,14 +4555,14 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -4822,13 +4598,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
- DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
- RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
- DataType::Type::kReference).GetCode());
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -4841,7 +4611,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -4859,12 +4629,12 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
Register current_method = InputRegisterAt(cls, 0);
- GenerateGcRootFieldLoad(cls,
- out_loc,
- current_method,
- ArtMethod::DeclaringClassOffset().Int32Value(),
- /* fixup_label */ nullptr,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ /* fixup_label= */ nullptr,
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -4880,31 +4650,16 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
break;
}
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
- break;
- }
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative type patch.
- const DexFile& dex_file = cls->GetDexFile();
- dex::TypeIndex type_index = cls->GetTypeIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index);
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative type patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index)));
- if (masked_hash != 0) {
- __ Sub(out.W(), out.W(), Operand(masked_hash));
- }
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -4914,29 +4669,36 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
vixl::aarch64::Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its PC-relative Class patch.
+ // Add LDR with its PC-relative Class .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label);
// /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(cls,
- out_loc,
- temp,
- /* offset placeholder */ 0u,
- ldr_label,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ temp,
+ /* offset placeholder */ 0u,
+ ldr_label,
+ read_barrier_option);
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress: {
__ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass()));
- GenerateGcRootFieldLoad(cls,
- out_loc,
- out.X(),
- /* offset */ 0,
- /* fixup_label */ nullptr,
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ out.X(),
+ /* offset= */ 0,
+ /* fixup_label= */ nullptr,
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kRuntimeCall:
@@ -4948,8 +4710,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
bool do_clinit = cls->MustGenerateClinitCheck();
if (generate_null_check || do_clinit) {
DCHECK(cls->CanCallRuntime());
- SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(
- cls, cls, cls->GetDexPc(), do_clinit);
+ SlowPathCodeARM64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ Cbz(out, slow_path->GetEntryLabel());
@@ -4959,10 +4721,30 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA
} else {
__ Bind(slow_path->GetExitLabel());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
}
+void LocationsBuilderARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location location = LocationFrom(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderARM64::VisitLoadMethodType(HLoadMethodType* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location location = LocationFrom(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorARM64::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+}
+
static MemOperand GetExceptionTlsAddress() {
return MemOperand(tr, Thread::ExceptionOffset<kArm64PointerSize>().Int32Value());
}
@@ -4989,14 +4771,14 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -5014,13 +4796,7 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
- DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
- RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference),
- DataType::Type::kReference).GetCode());
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -5048,23 +4824,15 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
codegen_->EmitAddPlaceholder(add_label, out.X(), out.X());
return;
}
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
- return;
- }
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- // Add ADRP with its PC-relative String patch.
- const DexFile& dex_file = load->GetDexFile();
- const dex::StringIndex string_index = load->GetStringIndex();
- vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index);
+ // Add ADRP with its PC-relative .data.bimg.rel.ro patch.
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
+ vixl::aarch64::Label* adrp_label = codegen_->NewBootImageRelRoPatch(boot_image_offset);
codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
- // Add LDR with its PC-relative String patch.
+ // Add LDR with its PC-relative .data.bimg.rel.ro patch.
vixl::aarch64::Label* ldr_label =
- codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, adrp_label);
codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X());
return;
}
@@ -5072,38 +4840,43 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
// Add ADRP with its PC-relative String .bss entry patch.
const DexFile& dex_file = load->GetDexFile();
const dex::StringIndex string_index = load->GetStringIndex();
- DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register temp = XRegisterFrom(out_loc);
vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index);
codegen_->EmitAdrpPlaceholder(adrp_label, temp);
- // Add LDR with its .bss entry String patch.
+ // Add LDR with its PC-relative String .bss entry patch.
vixl::aarch64::Label* ldr_label =
codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label);
// /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */
- GenerateGcRootFieldLoad(load,
- out_loc,
- temp,
- /* offset placeholder */ 0u,
- ldr_label,
- kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(load,
+ out_loc,
+ temp,
+ /* offset placeholder */ 0u,
+ ldr_label,
+ kCompilerReadBarrierOption);
SlowPathCodeARM64* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load);
codegen_->AddSlowPath(slow_path);
__ Cbz(out.X(), slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
+ return;
+ }
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
case HLoadString::LoadKind::kJitTableAddress: {
__ Ldr(out, codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
load->GetStringIndex(),
load->GetString()));
- GenerateGcRootFieldLoad(load,
- out_loc,
- out.X(),
- /* offset */ 0,
- /* fixup_label */ nullptr,
- kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(load,
+ out_loc,
+ out.X(),
+ /* offset= */ 0,
+ /* fixup_label= */ nullptr,
+ kCompilerReadBarrierOption);
return;
}
default:
@@ -5116,7 +4889,7 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD
__ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) {
@@ -5144,7 +4917,7 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins
} else {
CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitMul(HMul* mul) {
@@ -5235,50 +5008,25 @@ void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(LocationFrom(kArtMethodRegister));
- } else {
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Location temp = instruction->GetLocations()->GetTemp(0);
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
- __ Ldr(XRegisterFrom(temp), MemOperand(tr, QUICK_ENTRY_POINT(pNewEmptyString)));
- __ Ldr(lr, MemOperand(XRegisterFrom(temp), code_offset.Int32Value()));
-
- {
- // Ensure the pc position is recorded immediately after the `blr` instruction.
- ExactAssemblyScope eas(GetVIXLAssembler(),
- kInstructionSize,
- CodeBufferCheckScope::kExactSize);
- __ blr(lr);
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- }
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitNot(HNot* instruction) {
@@ -5319,7 +5067,7 @@ void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) {
return;
}
{
- // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
+ // Ensure that between load and RecordPcInfo there are no pools emitted.
EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
Location obj = instruction->GetLocations()->InAt(0);
__ Ldr(wzr, HeapOperandFrom(obj, Offset(0)));
@@ -5433,13 +5181,75 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) {
}
}
+void InstructionCodeGeneratorARM64::GenerateIntRemForPower2Denom(HRem *instruction) {
+ int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
+ uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm));
+ DCHECK(IsPowerOfTwo(abs_imm)) << abs_imm;
+
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+
+ if (abs_imm == 2) {
+ __ Cmp(dividend, 0);
+ __ And(out, dividend, 1);
+ __ Csneg(out, out, out, ge);
+ } else {
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+
+ __ Negs(temp, dividend);
+ __ And(out, dividend, abs_imm - 1);
+ __ And(temp, temp, abs_imm - 1);
+ __ Csneg(out, out, temp, mi);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateIntRemForConstDenom(HRem *instruction) {
+ int64_t imm = Int64FromLocation(instruction->GetLocations()->InAt(1));
+
+ if (imm == 0) {
+ // Do not generate anything.
+ // DivZeroCheck would prevent any code to be executed.
+ return;
+ }
+
+ if (IsPowerOfTwo(AbsOrMin(imm))) {
+ // Cases imm == -1 or imm == 1 are handled in constant folding by
+ // InstructionWithAbsorbingInputSimplifier.
+ // If the cases have survided till code generation they are handled in
+ // GenerateIntRemForPower2Denom becauses -1 and 1 are the power of 2 (2^0).
+ // The correct code is generated for them, just more instructions.
+ GenerateIntRemForPower2Denom(instruction);
+ } else {
+ DCHECK(imm < -2 || imm > 2) << imm;
+ GenerateDivRemWithAnyConstant(instruction);
+ }
+}
+
+void InstructionCodeGeneratorARM64::GenerateIntRem(HRem* instruction) {
+ DCHECK(DataType::IsIntOrLongType(instruction->GetResultType()))
+ << instruction->GetResultType();
+
+ if (instruction->GetLocations()->InAt(1).IsConstant()) {
+ GenerateIntRemForConstDenom(instruction);
+ } else {
+ Register out = OutputRegister(instruction);
+ Register dividend = InputRegisterAt(instruction, 0);
+ Register divisor = InputRegisterAt(instruction, 1);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ Register temp = temps.AcquireSameSizeAs(out);
+ __ Sdiv(temp, dividend, divisor);
+ __ Msub(out, temp, divisor, dividend);
+ }
+}
+
void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
DataType::Type type = rem->GetResultType();
switch (type) {
case DataType::Type::kInt32:
case DataType::Type::kInt64: {
- GenerateDivRemIntegral(rem);
+ GenerateIntRem(rem);
break;
}
@@ -5462,6 +5272,62 @@ void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) {
}
}
+void LocationsBuilderARM64::VisitMin(HMin* min) {
+ HandleBinaryOp(min);
+}
+
+void InstructionCodeGeneratorARM64::VisitMin(HMin* min) {
+ HandleBinaryOp(min);
+}
+
+void LocationsBuilderARM64::VisitMax(HMax* max) {
+ HandleBinaryOp(max);
+}
+
+void InstructionCodeGeneratorARM64::VisitMax(HMax* max) {
+ HandleBinaryOp(max);
+}
+
+void LocationsBuilderARM64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitAbs(HAbs* abs) {
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64: {
+ Register in_reg = InputRegisterAt(abs, 0);
+ Register out_reg = OutputRegister(abs);
+ __ Cmp(in_reg, Operand(0));
+ __ Cneg(out_reg, in_reg, lt);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64: {
+ FPRegister in_reg = InputFPRegisterAt(abs, 0);
+ FPRegister out_reg = OutputFPRegister(abs);
+ __ Fabs(out_reg, in_reg);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderARM64::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
@@ -5635,7 +5501,7 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction
return;
}
GenerateSuspendCheck(instruction, nullptr);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
void LocationsBuilderARM64::VisitThrow(HThrow* instruction) {
@@ -5848,8 +5714,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
out_reg,
offset,
maybe_temp,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -5889,8 +5755,8 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
obj_reg,
offset,
maybe_temp,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -5905,7 +5771,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
}
}
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+void CodeGeneratorARM64::GenerateGcRootFieldLoad(
HInstruction* instruction,
Location root,
Register obj,
@@ -5919,77 +5785,39 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // We use link-time generated thunks for the slow path. That thunk
- // checks the reference and jumps to the entrypoint if needed.
- //
- // lr = &return_address;
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto gc_root_thunk<root_reg>(lr)
- // }
- // return_address:
-
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data =
- linker::Arm64RelativePatcher::EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
- vixl::aarch64::Label* cbnz_label = codegen_->NewBakerReadBarrierPatch(custom_data);
-
- EmissionCheckScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- if (fixup_label != nullptr) {
- __ Bind(fixup_label);
- }
- static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
- "GC root LDR must be 2 instruction (8B) before the return address label.");
- __ ldr(root_reg, MemOperand(obj.X(), offset));
- __ Bind(cbnz_label);
- __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
- __ Bind(&return_address);
- } else {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will
- // be loaded by the slow path code.
- SlowPathCodeARM64* slow_path =
- new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root);
- codegen_->AddSlowPath(slow_path);
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- if (fixup_label == nullptr) {
- __ Ldr(root_reg, MemOperand(obj, offset));
- } else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj);
- }
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- __ Cbnz(mr, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode());
+
+ ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ if (fixup_label != nullptr) {
+ __ bind(fixup_label);
}
+ static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 instructions (8B) before the return address label.");
+ __ ldr(root_reg, MemOperand(obj.X(), offset));
+ EmitBakerReadBarrierCbnz(custom_data);
+ __ bind(&return_address);
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
@@ -5997,10 +5825,10 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (fixup_label == nullptr) {
__ Add(root_reg.X(), obj.X(), offset);
} else {
- codegen_->EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
+ EmitAddPlaceholder(fixup_label, root_reg.X(), obj.X());
}
// /* mirror::Object* */ root = root->Read()
- codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
// Plain GC root load with no read barrier.
@@ -6008,108 +5836,134 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
if (fixup_label == nullptr) {
__ Ldr(root_reg, MemOperand(obj, offset));
} else {
- codegen_->EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
+ EmitLdrOffsetPlaceholder(fixup_label, root_reg, obj.X());
}
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
+}
+
+void CodeGeneratorARM64::GenerateUnsafeCasOldValueMovWithBakerReadBarrier(
+ vixl::aarch64::Register marked,
+ vixl::aarch64::Register old_value) {
+ DCHECK(kEmitCompilerReadBarrier);
+ DCHECK(kUseBakerReadBarrier);
+
+ // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with a MOV instead of LDR.
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(marked.GetCode());
+
+ ExactAssemblyScope guard(GetVIXLAssembler(), 3 * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ static_assert(BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET == -8,
+ "GC root LDR must be 2 instructions (8B) before the return address label.");
+ __ mov(marked, old_value);
+ EmitBakerReadBarrierCbnz(custom_data);
+ __ bind(&return_address);
}
void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
- Register obj,
- uint32_t offset,
- Location maybe_temp,
+ vixl::aarch64::Register obj,
+ const vixl::aarch64::MemOperand& src,
bool needs_null_check,
bool use_load_acquire) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !use_load_acquire &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use link-time generated thunks for the slow path. That thunk checks
- // the holder and jumps to the entrypoint if needed. If the holder is not
- // gray, it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto field_thunk<holder_reg, base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = *(obj+offset);
- // gray_return_address:
-
- DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
- Register base = obj;
- if (offset >= kReferenceLoadMinFarOffset) {
- DCHECK(maybe_temp.IsRegister());
- base = WRegisterFrom(maybe_temp);
- static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
- __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
- offset &= (kReferenceLoadMinFarOffset - 1u);
- }
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data = linker::Arm64RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(),
- obj.GetCode());
- vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
+ //
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto field_thunk<holder_reg, base_reg, use_load_acquire>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
- {
- EmissionCheckScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- __ Bind(cbnz_label);
- __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
- static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Field LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- __ ldr(ref_reg, MemOperand(base.X(), offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- __ Bind(&return_address);
+ DCHECK(src.GetAddrMode() == vixl::aarch64::Offset);
+ DCHECK_ALIGNED(src.GetOffset(), sizeof(mirror::HeapReference<mirror::Object>));
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+ uint32_t custom_data = use_load_acquire
+ ? EncodeBakerReadBarrierAcquireData(src.GetBaseRegister().GetCode(), obj.GetCode())
+ : EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode());
+
+ {
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ EmitBakerReadBarrierCbnz(custom_data);
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ if (use_load_acquire) {
+ DCHECK_EQ(src.GetOffset(), 0);
+ __ ldar(ref_reg, src);
+ } else {
+ __ ldr(ref_reg, src);
}
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
- return;
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
+ // macro instructions disallowed in ExactAssemblyScope.
+ if (kPoisonHeapReferences) {
+ __ neg(ref_reg, Operand(ref_reg));
+ }
+ __ bind(&return_address);
}
-
- // /* HeapReference<Object> */ ref = *(obj + offset)
- Register temp = WRegisterFrom(maybe_temp);
- Location no_index = Location::NoLocation();
- size_t no_scale_factor = 0u;
- GenerateReferenceLoadWithBakerReadBarrier(instruction,
- ref,
- obj,
- offset,
- no_index,
- no_scale_factor,
- temp,
- needs_null_check,
- use_load_acquire);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
}
-void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ Register obj,
+ uint32_t offset,
+ Location maybe_temp,
+ bool needs_null_check,
+ bool use_load_acquire) {
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ Register base = obj;
+ if (use_load_acquire) {
+ DCHECK(maybe_temp.IsRegister());
+ base = WRegisterFrom(maybe_temp);
+ __ Add(base, obj, offset);
+ offset = 0u;
+ } else if (offset >= kReferenceLoadMinFarOffset) {
+ DCHECK(maybe_temp.IsRegister());
+ base = WRegisterFrom(maybe_temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ MemOperand src(base.X(), offset);
+ GenerateFieldLoadWithBakerReadBarrier(
+ instruction, ref, obj, src, needs_null_check, use_load_acquire);
+}
+
+void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
Location ref,
Register obj,
uint32_t data_offset,
Location index,
- Register temp,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
@@ -6119,267 +5973,72 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
size_t scale_factor = DataType::SizeShift(DataType::Type::kReference);
- if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use link-time generated thunks for the slow path. That thunk checks
- // the holder and jumps to the entrypoint if needed. If the holder is not
- // gray, it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto array_thunk<base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = data[index];
- // gray_return_address:
-
- DCHECK(index.IsValid());
- Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
- Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
-
- UseScratchRegisterScope temps(GetVIXLAssembler());
- DCHECK(temps.IsAvailable(ip0));
- DCHECK(temps.IsAvailable(ip1));
- temps.Exclude(ip0, ip1);
- uint32_t custom_data =
- linker::Arm64RelativePatcher::EncodeBakerReadBarrierArrayData(temp.GetCode());
- vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data);
-
- __ Add(temp.X(), obj.X(), Operand(data_offset));
- {
- EmissionCheckScope guard(GetVIXLAssembler(),
- (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
- vixl::aarch64::Label return_address;
- __ adr(lr, &return_address);
- __ Bind(cbnz_label);
- __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time.
- static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
- "Array LDR must be 1 instruction (4B) before the return address label; "
- " 2 instructions (8B) for heap poisoning.");
- __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
- __ Bind(&return_address);
- }
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1));
- return;
- }
-
- // Array cells are never volatile variables, therefore array loads
- // never use Load-Acquire instructions on ARM64.
- const bool use_load_acquire = false;
-
- // /* HeapReference<Object> */ ref =
- // *(obj + data_offset + index * sizeof(HeapReference<Object>))
- GenerateReferenceLoadWithBakerReadBarrier(instruction,
- ref,
- obj,
- data_offset,
- index,
- scale_factor,
- temp,
- needs_null_check,
- use_load_acquire);
-}
-
-void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- Register temp,
- bool needs_null_check,
- bool use_load_acquire) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- // If we are emitting an array load, we should not be using a
- // Load Acquire instruction. In other words:
- // `instruction->IsArrayGet()` => `!use_load_acquire`.
- DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
-
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
// Marking Register) to decide whether we need to enter the slow
// path to mark the reference. Then, in the slow path, check the
// gray bit in the lock word of the reference's holder (`obj`) to
// decide whether to mark `ref` or not.
//
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // }
-
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will be loaded by the slow path code.
- SlowPathCodeARM64* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64(
- instruction,
- ref,
- obj,
- offset,
- index,
- scale_factor,
- needs_null_check,
- use_load_acquire,
- temp);
- AddSlowPath(slow_path);
-
- __ Cbnz(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: just load the reference.
- GenerateRawReferenceLoad(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire);
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
-}
-
-void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- Register obj,
- Location field_offset,
- Register temp,
- bool needs_null_check,
- bool use_load_acquire) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- // If we are emitting an array load, we should not be using a
- // Load Acquire instruction. In other words:
- // `instruction->IsArrayGet()` => `!use_load_acquire`.
- DCHECK(!instruction->IsArrayGet() || !use_load_acquire);
-
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to update the reference field within `obj`. Then, in the
- // slow path, check the gray bit in the lock word of the reference's
- // holder (`obj`) to decide whether to mark `ref` and update the
- // field or not.
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
//
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
// }
- // }
-
- // Slow path updating the object reference at address `obj + field_offset`
- // when the GC is marking. The entrypoint will be loaded by the slow path code.
- SlowPathCodeARM64* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64(
- instruction,
- ref,
- obj,
- /* offset */ 0u,
- /* index */ field_offset,
- /* scale_factor */ 0u /* "times 1" */,
- needs_null_check,
- use_load_acquire,
- temp);
- AddSlowPath(slow_path);
-
- __ Cbnz(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: nothing to do (the field is
- // up-to-date, and we don't need to load the reference).
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__);
-}
-
-void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire) {
- DCHECK(obj.IsW());
- DataType::Type type = DataType::Type::kReference;
- Register ref_reg = RegisterFrom(ref, type);
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
- // If needed, vixl::EmissionCheckScope guards are used to ensure
- // that no pools are emitted between the load (macro) instruction
- // and MaybeRecordImplicitNullCheck.
+ DCHECK(index.IsValid());
+ Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
+ Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- if (index.IsValid()) {
- // Load types involving an "index": ArrayGet,
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics.
- if (use_load_acquire) {
- // UnsafeGetObjectVolatile intrinsic case.
- // Register `index` is not an index in an object array, but an
- // offset to an object reference field within object `obj`.
- DCHECK(instruction->IsInvoke()) << instruction->DebugName();
- DCHECK(instruction->GetLocations()->Intrinsified());
- DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
- << instruction->AsInvoke()->GetIntrinsic();
- DCHECK_EQ(offset, 0u);
- DCHECK_EQ(scale_factor, 0u);
- DCHECK_EQ(needs_null_check, false);
- // /* HeapReference<mirror::Object> */ ref = *(obj + index)
- MemOperand field = HeapOperand(obj, XRegisterFrom(index));
- LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
- } else {
- // ArrayGet and UnsafeGetObject and UnsafeCASObject intrinsics cases.
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
- if (index.IsConstant()) {
- uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
- EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- Load(type, ref_reg, HeapOperand(obj, computed_offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- } else {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp = temps.AcquireW();
- __ Add(temp, obj, offset);
- {
- EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- Load(type, ref_reg, HeapOperand(temp, XRegisterFrom(index), LSL, scale_factor));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ DCHECK(temps.IsAvailable(ip0));
+ DCHECK(temps.IsAvailable(ip1));
+ temps.Exclude(ip0, ip1);
+
+ Register temp;
+ if (instruction->GetArray()->IsIntermediateAddress()) {
+ // We do not need to compute the intermediate address from the array: the
+ // input instruction has done it already. See the comment in
+ // `TryExtractArrayAccessAddress()`.
+ if (kIsDebugBuild) {
+ HIntermediateAddress* interm_addr = instruction->GetArray()->AsIntermediateAddress();
+ DCHECK_EQ(interm_addr->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
}
+ temp = obj;
} else {
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
- MemOperand field = HeapOperand(obj, offset);
- if (use_load_acquire) {
- // Implicit null checks are handled by CodeGeneratorARM64::LoadAcquire.
- LoadAcquire(instruction, ref_reg, field, needs_null_check);
- } else {
- EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- Load(type, ref_reg, field);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
+ temp = WRegisterFrom(instruction->GetLocations()->GetTemp(0));
+ __ Add(temp.X(), obj.X(), Operand(data_offset));
}
- // Object* ref = ref_addr->AsMirrorPtr()
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(temp.GetCode());
+
+ {
+ ExactAssemblyScope guard(GetVIXLAssembler(),
+ (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize);
+ vixl::aarch64::Label return_address;
+ __ adr(lr, &return_address);
+ EmitBakerReadBarrierCbnz(custom_data);
+ static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Array LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Unpoison the reference explicitly if needed. MaybeUnpoisonHeapReference() uses
+ // macro instructions disallowed in ExactAssemblyScope.
+ if (kPoisonHeapReferences) {
+ __ neg(ref_reg, Operand(ref_reg));
+ }
+ __ bind(&return_address);
+ }
+ MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__, /* temp_loc= */ LocationFrom(ip1));
}
void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
@@ -6510,5 +6169,193 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_
#undef __
#undef QUICK_ENTRY_POINT
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(arm64::Arm64Assembler& assembler,
+ vixl::aarch64::Register base_reg,
+ vixl::aarch64::MemOperand& lock_word,
+ vixl::aarch64::Label* slow_path,
+ vixl::aarch64::Label* throw_npe = nullptr) {
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip0.W(), lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tbnz(ip0.W(), LockWord::kReadBarrierStateShift, slow_path);
+ static_assert(
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET,
+ "Field and array LDR offsets must be the same to reuse the same code.");
+ // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+ if (throw_npe != nullptr) {
+ __ Bind(throw_npe);
+ }
+ // Adjust the return address back to the LDR (1 instruction; 2 for heap poisoning).
+ static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4),
+ "Field LDR must be 1 instruction (4B) before the return address label; "
+ " 2 instructions (8B) for heap poisoning.");
+ __ Add(lr, lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(ip0, LSR, 32));
+ __ Br(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`.
+static void LoadReadBarrierMarkIntrospectionEntrypoint(arm64::Arm64Assembler& assembler,
+ vixl::aarch64::Register entrypoint) {
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg16, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip0.GetCode(), 16u);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ip0.GetCode());
+ __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+}
+
+void CodeGeneratorARM64::CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name) {
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ case BakerReadBarrierKind::kAcquire: {
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ auto holder_reg =
+ Register::GetXRegFromCode(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+ // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+ // null-check the holder as we do not necessarily do that check before going to the thunk.
+ vixl::aarch64::Label throw_npe_label;
+ vixl::aarch64::Label* throw_npe = nullptr;
+ if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+ throw_npe = &throw_npe_label;
+ __ Cbz(holder_reg.W(), throw_npe);
+ }
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint that performs further checks on the
+ // reference and marks it if needed.
+ vixl::aarch64::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, throw_npe);
+ __ Bind(&slow_path);
+ if (kind == BakerReadBarrierKind::kField) {
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (immediate) unsigned offset.
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ __ Ubfx(ip0.W(), ip0.W(), 10, 12); // Extract the offset.
+ __ Ldr(ip0.W(), MemOperand(base_reg, ip0, LSL, 2)); // Load the reference.
+ } else {
+ DCHECK(kind == BakerReadBarrierKind::kAcquire);
+ DCHECK(!base_reg.Is(holder_reg));
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ __ Ldar(ip0.W(), MemOperand(base_reg));
+ }
+ // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+ __ Br(ip1); // Jump to the entrypoint.
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ auto base_reg =
+ Register::GetXRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffset(), 0);
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path);
+ __ Bind(&slow_path);
+ MemOperand ldr_address(lr, BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
+ __ Ldr(ip0.W(), ldr_address); // Load the LDR (register) unsigned offset.
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ __ Ubfx(ip0, ip0, 16, 6); // Extract the index register, plus 32 (bit 21 is set).
+ __ Bfi(ip1, ip0, 3, 6); // Insert ip0 to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip0, base_reg); // Move the base register to ip0.
+ __ Br(ip1); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ auto root_reg =
+ Register::GetWRegFromCode(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip0, ip1);
+ vixl::aarch64::Label return_label, not_marked, forwarding_address;
+ __ Cbz(root_reg, &return_label);
+ MemOperand lock_word(root_reg.X(), mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip0.W(), lock_word);
+ __ Tbz(ip0.W(), LockWord::kMarkBitStateShift, &not_marked);
+ __ Bind(&return_label);
+ __ Br(lr);
+ __ Bind(&not_marked);
+ __ Tst(ip0.W(), Operand(ip0.W(), LSL, 1));
+ __ B(&forwarding_address, mi);
+ LoadReadBarrierMarkIntrospectionEntrypoint(assembler, ip1);
+ // Adjust the art_quick_read_barrier_mark_introspection address in IP1 to
+ // art_quick_read_barrier_mark_introspection_gc_roots.
+ __ Add(ip1, ip1, Operand(BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET));
+ __ Mov(ip0.W(), root_reg);
+ __ Br(ip1);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip0.W(), LockWord::kForwardingAddressShift);
+ __ Br(lr);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+
+ // For JIT, the slow path is considered part of the compiled method,
+ // so JIT should pass null as `debug_name`. Tests may not have a runtime.
+ DCHECK(Runtime::Current() == nullptr ||
+ !Runtime::Current()->UseJitCompilation() ||
+ debug_name == nullptr);
+ if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
+ std::ostringstream oss;
+ oss << "BakerReadBarrierThunk";
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ oss << "Field_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+ << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+ break;
+ case BakerReadBarrierKind::kAcquire:
+ oss << "Acquire_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+ << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+ break;
+ case BakerReadBarrierKind::kArray:
+ oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ case BakerReadBarrierKind::kGcRoot:
+ oss << "GcRoot_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ }
+ *debug_name = oss.str();
+ }
+}
+
+#undef __
+
} // namespace arm64
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 0654046de5..ada5742fc0 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
#define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM64_H_
-#include "arch/arm64/quick_method_frame_info_arm64.h"
+#include "base/bit_field.h"
#include "code_generator.h"
#include "common_arm64.h"
#include "dex/dex_file_types.h"
@@ -36,6 +36,11 @@
#pragma GCC diagnostic pop
namespace art {
+
+namespace linker {
+class Arm64RelativePatcherTest;
+} // namespace linker
+
namespace arm64 {
class CodeGeneratorARM64;
@@ -87,6 +92,16 @@ const vixl::aarch64::CPURegList runtime_reserved_core_registers =
((kEmitCompilerReadBarrier && kUseBakerReadBarrier) ? mr : vixl::aarch64::NoCPUReg),
vixl::aarch64::lr);
+// Some instructions have special requirements for a temporary, for example
+// LoadClass/kBssEntry and LoadString/kBssEntry for Baker read barrier require
+// temp that's not an R0 (to avoid an extra move) and Baker read barrier field
+// loads with large offsets need a fixed register to limit the number of link-time
+// thunks we generate. For these and similar cases, we want to reserve a specific
+// register that's neither callee-save nor an argument register. We choose x15.
+inline Location FixedTempLocation() {
+ return Location::RegisterLocation(vixl::aarch64::x15.GetCode());
+}
+
// Callee-save registers AAPCS64, without x19 (Thread Register) (nor
// x20 (Marking Register) when emitting Baker read barriers).
const vixl::aarch64::CPURegList callee_saved_core_registers(
@@ -110,8 +125,8 @@ class SlowPathCodeARM64 : public SlowPathCode {
vixl::aarch64::Label* GetEntryLabel() { return &entry_label_; }
vixl::aarch64::Label* GetExitLabel() { return &exit_label_; }
- void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
- void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
private:
vixl::aarch64::Label entry_label_;
@@ -201,11 +216,11 @@ class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConvention
InvokeDexCallingConventionVisitorARM64() {}
virtual ~InvokeDexCallingConventionVisitorARM64() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type return_type) const OVERRIDE {
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type return_type) const override {
return calling_convention.GetReturnLocation(return_type);
}
- Location GetMethodLocation() const OVERRIDE;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConvention calling_convention;
@@ -217,22 +232,22 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionARM64() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return helpers::LocationFrom(vixl::aarch64::x1);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return helpers::LocationFrom(vixl::aarch64::x0);
}
Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
- bool is_instance) const OVERRIDE {
+ bool is_instance) const override {
return is_instance
? helpers::LocationFrom(vixl::aarch64::x2)
: helpers::LocationFrom(vixl::aarch64::x1);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return helpers::LocationFrom(vixl::aarch64::d0);
}
@@ -245,7 +260,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
InstructionCodeGeneratorARM64(HGraph* graph, CodeGeneratorARM64* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
@@ -253,7 +268,7 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -264,6 +279,8 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeARM64* slow_path,
vixl::aarch64::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch64::Register temp);
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* instr);
@@ -303,17 +320,6 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option);
- // Generate a GC root reference load:
- //
- // root <- *(obj + offset)
- //
- // while honoring read barriers based on read_barrier_option.
- void GenerateGcRootFieldLoad(HInstruction* instruction,
- Location root,
- vixl::aarch64::Register obj,
- uint32_t offset,
- vixl::aarch64::Label* fixup_label,
- ReadBarrierOption read_barrier_option);
// Generate a floating-point comparison.
void GenerateFcmp(HInstruction* instruction);
@@ -326,7 +332,12 @@ class InstructionCodeGeneratorARM64 : public InstructionCodeGenerator {
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivRemByPowerOfTwo(HBinaryOperation* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
- void GenerateDivRemIntegral(HBinaryOperation* instruction);
+ void GenerateIntDiv(HDiv* instruction);
+ void GenerateIntDivForConstDenom(HDiv *instruction);
+ void GenerateIntDivForPower2Denom(HDiv *instruction);
+ void GenerateIntRem(HRem* instruction);
+ void GenerateIntRemForConstDenom(HRem *instruction);
+ void GenerateIntRemForPower2Denom(HRem *instruction);
void HandleGoto(HInstruction* got, HBasicBlock* successor);
vixl::aarch64::MemOperand VecAddress(
@@ -349,7 +360,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
@@ -357,7 +368,7 @@ class LocationsBuilderARM64 : public HGraphVisitor {
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -382,11 +393,11 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
: ParallelMoveResolverNoSwap(allocator), codegen_(codegen), vixl_temps_() {}
protected:
- void PrepareForEmitNativeCode() OVERRIDE;
- void FinishEmitNativeCode() OVERRIDE;
- Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE;
- void FreeScratchLocation(Location loc) OVERRIDE;
- void EmitMove(size_t index) OVERRIDE;
+ void PrepareForEmitNativeCode() override;
+ void FinishEmitNativeCode() override;
+ Location AllocateScratchLocationFor(Location::Kind kind) override;
+ void FreeScratchLocation(Location loc) override;
+ void EmitMove(size_t index) override;
private:
Arm64Assembler* GetAssembler() const;
@@ -403,44 +414,43 @@ class ParallelMoveResolverARM64 : public ParallelMoveResolverNoSwap {
class CodeGeneratorARM64 : public CodeGenerator {
public:
CodeGeneratorARM64(HGraph* graph,
- const Arm64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorARM64() {}
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
vixl::aarch64::CPURegList GetFramePreservedCoreRegisters() const;
vixl::aarch64::CPURegList GetFramePreservedFPRegisters() const;
- void Bind(HBasicBlock* block) OVERRIDE;
+ void Bind(HBasicBlock* block) override;
vixl::aarch64::Label* GetLabelOf(HBasicBlock* block) {
block = FirstNonEmptyBlock(block);
return &(block_labels_[block->GetBlockId()]);
}
- size_t GetWordSize() const OVERRIDE {
+ size_t GetWordSize() const override {
return kArm64WordSize;
}
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ size_t GetFloatingPointSpillSlotSize() const override {
return GetGraph()->HasSIMD()
? 2 * kArm64WordSize // 16 bytes == 2 arm64 words for each spill
: 1 * kArm64WordSize; // 8 bytes == 1 arm64 words for each spill
}
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
vixl::aarch64::Label* block_entry_label = GetLabelOf(block);
DCHECK(block_entry_label->IsBound());
return block_entry_label->GetLocation();
}
- HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
- HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
- Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
- const Arm64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+ HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+ Arm64Assembler* GetAssembler() override { return &assembler_; }
+ const Arm64Assembler& GetAssembler() const override { return assembler_; }
vixl::aarch64::MacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
// Emit a write barrier.
@@ -452,12 +462,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters() const OVERRIDE;
+ void SetupBlockedRegisters() const override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
// The number of registers that can be allocated. The register allocator may
// decide to reserve and not use a few of them.
@@ -469,37 +479,35 @@ class CodeGeneratorARM64 : public CodeGenerator {
static const int kNumberOfAllocatableFPRegisters = vixl::aarch64::kNumberOfFPRegisters;
static constexpr int kNumberOfAllocatableRegisterPairs = 0;
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- InstructionSet GetInstructionSet() const OVERRIDE {
+ InstructionSet GetInstructionSet() const override {
return InstructionSet::kArm64;
}
- const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const {
- return isa_features_;
- }
+ const Arm64InstructionSetFeatures& GetInstructionSetFeatures() const;
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
// We want to use the STP and LDP instructions to spill and restore registers for slow paths.
// These instructions can only encode offsets that are multiples of the register size accessed.
- uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; }
+ uint32_t GetPreferredSlotsAlignment() const override { return vixl::aarch64::kXRegSizeInBytes; }
JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) {
jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr));
return jump_tables_.back().get();
}
- void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void Finalize(CodeAllocator* allocator) override;
// Code generation helpers.
void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant);
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
void Load(DataType::Type type,
vixl::aarch64::CPURegister dst,
@@ -521,7 +529,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -529,39 +537,53 @@ class CodeGeneratorARM64 : public CodeGenerator {
HInstruction* instruction,
SlowPathCode* slow_path);
- ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+ ParallelMoveResolverARM64* GetMoveResolver() override { return &move_resolver_; }
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
return false;
}
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
- DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+ DataType::Type type ATTRIBUTE_UNUSED) override {
UNIMPLEMENTED(FATAL);
}
- // Add a new PC-relative method patch for an instruction and return the label
+ // Add a new boot image intrinsic patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
+ // Add a new boot image relocation patch for an instruction and return the label
+ // to be bound before the instruction. The instruction will be either the
+ // ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label` pointing
+ // to the associated ADRP patch label).
+ vixl::aarch64::Label* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ vixl::aarch64::Label* adrp_label = nullptr);
+
+ // Add a new boot image method patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -575,7 +597,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative type patch for an instruction and return the label
+ // Add a new boot image type patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -591,7 +613,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::TypeIndex type_index,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new PC-relative string patch for an instruction and return the label
+ // Add a new boot image string patch for an instruction and return the label
// to be bound before the instruction. The instruction will be either the
// ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
// to the associated ADRP patch label).
@@ -607,9 +629,9 @@ class CodeGeneratorARM64 : public CodeGenerator {
dex::StringIndex string_index,
vixl::aarch64::Label* adrp_label = nullptr);
- // Add a new baker read barrier patch and return the label to be bound
- // before the CBNZ instruction.
- vixl::aarch64::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+ // Emit the CBNZ instruction for baker read barrier and record
+ // the associated patch for AOT or slow path for JIT.
+ void EmitBakerReadBarrierCbnz(uint32_t custom_data);
vixl::aarch64::Literal<uint32_t>* DeduplicateBootImageAddressLiteral(uint64_t address);
vixl::aarch64::Literal<uint32_t>* DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -627,10 +649,40 @@ class CodeGeneratorARM64 : public CodeGenerator {
vixl::aarch64::Register out,
vixl::aarch64::Register base);
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ void LoadBootImageAddress(vixl::aarch64::Register reg, uint32_t boot_image_reference);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+ bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
+ void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) override;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
+
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers based on read_barrier_option.
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::aarch64::Register obj,
+ uint32_t offset,
+ vixl::aarch64::Label* fixup_label,
+ ReadBarrierOption read_barrier_option);
+ // Generate MOV for the `old_value` in UnsafeCASObject and mark it with Baker read barrier.
+ void GenerateUnsafeCasOldValueMovWithBakerReadBarrier(vixl::aarch64::Register marked,
+ vixl::aarch64::Register old_value);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch64::Register obj,
+ const vixl::aarch64::MemOperand& src,
+ bool needs_null_check,
+ bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -642,58 +694,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
bool use_load_acquire);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
- void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+ void GenerateArrayLoadWithBakerReadBarrier(HArrayGet* instruction,
Location ref,
vixl::aarch64::Register obj,
uint32_t data_offset,
Location index,
- vixl::aarch64::Register temp,
bool needs_null_check);
- // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
- // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
- //
- // Load the object reference located at the address
- // `obj + offset + (index << scale_factor)`, held by object `obj`, into
- // `ref`, and mark it if needed.
- void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch64::Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- vixl::aarch64::Register temp,
- bool needs_null_check,
- bool use_load_acquire);
-
- // Generate code checking whether the the reference field at the
- // address `obj + field_offset`, held by object `obj`, needs to be
- // marked, and if so, marking it and updating the field within `obj`
- // with the marked value.
- //
- // This routine is used for the implementation of the
- // UnsafeCASObject intrinsic with Baker read barriers.
- //
- // This method has a structure similar to
- // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
- // `ref` is only as a temporary here, and thus its value should not
- // be used afterwards.
- void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch64::Register obj,
- Location field_offset,
- vixl::aarch64::Register temp,
- bool needs_null_check,
- bool use_load_acquire);
-
- // Generate a heap reference load (with no read barrier).
- void GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl::aarch64::Register obj,
- uint32_t offset,
- Location index,
- size_t scale_factor,
- bool needs_null_check,
- bool use_load_acquire);
// Emit code checking the status of the Marking Register, and
// aborting the program if MR does not match the value stored in the
@@ -759,12 +765,78 @@ class CodeGeneratorARM64 : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
- void GenerateNop() OVERRIDE;
+ void GenerateNop() override;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
private:
+ // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kAcquire, // Volatile field get.
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kLast = kGcRoot
+ };
+
+ static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* sp/zr is invalid */ 31u;
+
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBakerReadBarrierBitsForRegister =
+ MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t,
+ kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+ kBakerReadBarrierBitsForRegister>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < vixl::aarch64::lr.GetCode() &&
+ reg != vixl::aarch64::ip0.GetCode() &&
+ reg != vixl::aarch64::ip1.GetCode()) << reg;
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg, uint32_t holder_reg) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg);
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierAcquireData(uint32_t base_reg, uint32_t holder_reg) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ DCHECK_NE(base_reg, holder_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kAcquire) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg);
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+ }
+
+ static inline uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg) {
+ CheckValidReg(root_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg);
+ }
+
+ void CompileBakerReadBarrierThunk(Arm64Assembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name);
+
using Uint64ToLiteralMap = ArenaSafeMap<uint64_t, vixl::aarch64::Literal<uint64_t>*>;
using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, vixl::aarch64::Literal<uint32_t>*>;
using StringToLiteralMap = ArenaSafeMap<StringReference,
@@ -814,13 +886,13 @@ class CodeGeneratorARM64 : public CodeGenerator {
InstructionCodeGeneratorARM64 instruction_visitor_;
ParallelMoveResolverARM64 move_resolver_;
Arm64Assembler assembler_;
- const Arm64InstructionSetFeatures& isa_features_;
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
// Deduplication map for 64-bit literals, used for non-patchable method address or method code.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/BootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -828,10 +900,12 @@ class CodeGeneratorARM64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
@@ -840,6 +914,20 @@ class CodeGeneratorARM64 : public CodeGenerator {
// Patches for class literals in JIT compiled code.
TypeToLiteralMap jit_class_patches_;
+ // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
+ // Wrap the label to work around vixl::aarch64::Label being non-copyable
+ // and non-moveable and as such unusable in ArenaSafeMap<>.
+ struct LabelWrapper {
+ LabelWrapper(const LabelWrapper& src)
+ : label() {
+ DCHECK(!src.label.IsLinked() && !src.label.IsBound());
+ }
+ LabelWrapper() = default;
+ vixl::aarch64::Label label;
+ };
+ ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_;
+
+ friend class linker::Arm64RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARM64);
};
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 2452139d42..6469c6964a 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -27,9 +27,10 @@
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
+#include "intrinsics.h"
#include "intrinsics_arm_vixl.h"
-#include "linker/arm/relative_patcher_thumb2.h"
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
@@ -47,7 +48,6 @@ using namespace vixl32; // NOLINT(build/namespaces)
using helpers::DRegisterFrom;
using helpers::DWARFReg;
-using helpers::HighDRegisterFrom;
using helpers::HighRegisterFrom;
using helpers::InputDRegisterAt;
using helpers::InputOperandAt;
@@ -85,18 +85,10 @@ static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
// Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
// offset < 4KiB. For offsets >= 4KiB, the load shall be emitted as two or more instructions.
-// For the Baker read barrier implementation using link-generated thunks we need to split
+// For the Baker read barrier implementation using link-time generated thunks we need to split
// the offset explicitly.
constexpr uint32_t kReferenceLoadMinFarOffset = 4 * KB;
-// Flags controlling the use of link-time generated thunks for Baker read barriers.
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForFields = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForArrays = true;
-constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true;
-
-// The reserved entrypoint register for link-time generated thunks.
-const vixl32::Register kBakerCcEntrypointRegister = r4;
-
// Using a base helps identify when we hit Marking Register check breakpoints.
constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
@@ -111,26 +103,6 @@ constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10;
// Marker that code is yet to be, and must, be implemented.
#define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
-static inline void ExcludeIPAndBakerCcEntrypointRegister(UseScratchRegisterScope* temps,
- HInstruction* instruction) {
- DCHECK(temps->IsAvailable(ip));
- temps->Exclude(ip);
- DCHECK(!temps->IsAvailable(kBakerCcEntrypointRegister));
- DCHECK_EQ(kBakerCcEntrypointRegister.GetCode(),
- linker::Thumb2RelativePatcher::kBakerCcEntrypointRegister);
- DCHECK_NE(instruction->GetLocations()->GetTempCount(), 0u);
- DCHECK(RegisterFrom(instruction->GetLocations()->GetTemp(
- instruction->GetLocations()->GetTempCount() - 1u)).Is(kBakerCcEntrypointRegister));
-}
-
-static inline void EmitPlaceholderBne(CodeGeneratorARMVIXL* codegen, vixl32::Label* patch_label) {
- ExactAssemblyScope eas(codegen->GetVIXLAssembler(), kMaxInstructionSizeInBytes);
- __ bind(patch_label);
- vixl32::Label placeholder_label;
- __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
- __ bind(&placeholder_label);
-}
-
static inline bool CanEmitNarrowLdr(vixl32::Register rt, vixl32::Register rn, uint32_t offset) {
return rt.IsLow() && rn.IsLow() && offset < 32u;
}
@@ -139,7 +111,7 @@ class EmitAdrCode {
public:
EmitAdrCode(ArmVIXLMacroAssembler* assembler, vixl32::Register rd, vixl32::Label* label)
: assembler_(assembler), rd_(rd), label_(label) {
- ExactAssemblyScope aas(assembler, kMaxInstructionSizeInBytes);
+ DCHECK(!assembler->AllowMacroInstructions()); // In ExactAssemblyScope.
adr_location_ = assembler->GetCursorOffset();
assembler->adr(EncodingSize(Wide), rd, label);
}
@@ -165,6 +137,15 @@ class EmitAdrCode {
int32_t adr_location_;
};
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ return caller_saves;
+}
+
// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
// for each live D registers they treat two corresponding S registers as live ones.
//
@@ -338,7 +319,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
size_t orig_offset = stack_offset;
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
// If the register holds an object, update the stack mask.
if (locations->RegisterContainsObject(i)) {
@@ -353,7 +334,7 @@ void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSumm
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
- uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
orig_offset = stack_offset;
for (uint32_t i : LowToHighBits(fp_spills)) {
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
@@ -376,7 +357,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS
size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
size_t orig_offset = stack_offset;
- const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+ const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
for (uint32_t i : LowToHighBits(core_spills)) {
DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
@@ -387,7 +368,7 @@ void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationS
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
- uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+ uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
while (fp_spills != 0u) {
uint32_t begin = CTZ(fp_spills);
uint32_t tmp = fp_spills + (1u << begin);
@@ -402,7 +383,7 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -416,9 +397,9 @@ class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
@@ -429,16 +410,16 @@ class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
: SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
@@ -449,7 +430,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeARMVIXL(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
@@ -470,7 +451,7 @@ class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
return successor_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathARMVIXL"; }
private:
// If not null, the block to branch to after the suspend check.
@@ -487,7 +468,7 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
: SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
@@ -514,9 +495,9 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
@@ -524,29 +505,39 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
- LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
- : SlowPathCodeARMVIXL(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at)
+ : SlowPathCodeARMVIXL(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
- dex::TypeIndex type_index = cls_->GetTypeIndex();
- __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
- QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
- : kQuickInitializeType;
- arm_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), arm_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ Mov(calling_convention.GetRegisterAt(0), type_index.index_);
+ arm_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ arm_codegen->Move32(LocationFrom(calling_convention.GetRegisterAt(0)), source);
+ }
+ if (must_do_clinit) {
+ arm_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
@@ -558,18 +549,12 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathARMVIXL"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
};
@@ -578,7 +563,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
explicit LoadStringSlowPathARMVIXL(HLoadString* instruction)
: SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
DCHECK(instruction_->IsLoadString());
DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
@@ -600,7 +585,7 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARMVIXL);
@@ -611,7 +596,7 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
: SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(instruction_->IsCheckCast()
|| !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -655,9 +640,9 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathARMVIXL"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -670,7 +655,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
: SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -683,7 +668,7 @@ class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
@@ -693,7 +678,7 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -724,495 +709,12 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathARMVIXL"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
};
-// Abstract base class for read barrier slow paths marking a reference
-// `ref`.
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathBaseARMVIXL : public SlowPathCodeARMVIXL {
- protected:
- ReadBarrierMarkSlowPathBaseARMVIXL(HInstruction* instruction, Location ref, Location entrypoint)
- : SlowPathCodeARMVIXL(instruction), ref_(ref), entrypoint_(entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathBaseARMVIXL"; }
-
- // Generate assembly code calling the read barrier marking runtime
- // entry point (ReadBarrierMarkRegX).
- void GenerateReadBarrierMarkRuntimeCall(CodeGenerator* codegen) {
- vixl32::Register ref_reg = RegisterFrom(ref_);
-
- // No need to save live registers; it's taken care of by the
- // entrypoint. Also, there is no need to update the stack mask,
- // as this runtime call will not trigger a garbage collection.
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
- DCHECK(!ref_reg.Is(sp));
- DCHECK(!ref_reg.Is(lr));
- DCHECK(!ref_reg.Is(pc));
- // IP is used internally by the ReadBarrierMarkRegX entry point
- // as a temporary, it cannot be the entry point's input/output.
- DCHECK(!ref_reg.Is(ip));
- DCHECK(ref_reg.IsRegister()) << ref_reg;
- // "Compact" slow path, saving two moves.
- //
- // Instead of using the standard runtime calling convention (input
- // and output in R0):
- //
- // R0 <- ref
- // R0 <- ReadBarrierMark(R0)
- // ref <- R0
- //
- // we just use rX (the register containing `ref`) as input and output
- // of a dedicated entrypoint:
- //
- // rX <- ReadBarrierMarkRegX(rX)
- //
- if (entrypoint_.IsValid()) {
- arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
- __ Blx(RegisterFrom(entrypoint_));
- } else {
- // Entrypoint is not already loaded, load from the thread.
- int32_t entry_point_offset =
- Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg.GetCode());
- // This runtime call does not require a stack map.
- arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
- }
- }
-
- // The location (register) of the marked object reference.
- const Location ref_;
-
- // The location of the entrypoint if already loaded.
- const Location entrypoint_;
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathBaseARMVIXL);
-};
-
-// Slow path marking an object reference `ref` during a read
-// barrier. The field `obj.field` in the object `obj` holding this
-// reference does not get updated by this slow path after marking.
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class ReadBarrierMarkSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- ReadBarrierMarkSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint) {
- DCHECK(kEmitCompilerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathARMVIXL"; }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- DCHECK(locations->CanCall());
- DCHECK(ref_.IsRegister()) << ref_;
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
- DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
-
- __ Bind(GetEntryLabel());
- GenerateReadBarrierMarkRuntimeCall(codegen);
- __ B(GetExitLabel());
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). The field `obj.field` in the object `obj` holding
-// this reference does not get updated by this slow path after marking
-// (see LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
-// below for that).
-//
-// This means that after the execution of this slow path, `ref` will
-// always be up-to-date, but `obj.field` may not; i.e., after the
-// flip, `ref` will be a to-space reference, but `obj.field` will
-// probably still be a from-space reference (unless it gets updated by
-// another thread, or if another thread installed another object
-// reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierSlowPathARMVIXL : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp_(temp) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK(instruction_->IsInstanceFieldGet() ||
- instruction_->IsStaticFieldGet() ||
- instruction_->IsArrayGet() ||
- instruction_->IsArraySet() ||
- instruction_->IsInstanceOf() ||
- instruction_->IsCheckCast() ||
- (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
- (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking slow path: "
- << instruction_->DebugName();
- // The read barrier instrumentation of object ArrayGet
- // instructions does not support the HIntermediateAddress
- // instruction.
- DCHECK(!(instruction_->IsArrayGet() &&
- instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
-
- // Temporary register `temp_`, used to store the lock word, must
- // not be IP, as we may use it to emit the reference load (in the
- // call to GenerateRawReferenceLoad below), and we need the lock
- // word to still be in `temp_` after the reference load.
- DCHECK(!temp_.Is(ip));
-
- __ Bind(GetEntryLabel());
-
- // When using MaybeGenerateReadBarrierSlow, the read barrier call is
- // inserted after the original load. However, in fast path based
- // Baker's read barriers, we need to perform the load of
- // mirror::Object::monitor_ *before* the original reference load.
- // This load-load ordering is required by the read barrier.
- // The slow path (for Baker's algorithm) should look like:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- //
- // Note: the original implementation in ReadBarrier::Barrier is
- // slightly more complex as it performs additional checks that we do
- // not do here for performance reasons.
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp_, temp_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`.
- vixl32::Register temp_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierSlowPathARMVIXL);
-};
-
-// Slow path loading `obj`'s lock word, loading a reference from
-// object `*(obj + offset + (index << scale_factor))` into `ref`, and
-// marking `ref` if `obj` is gray according to the lock word (Baker
-// read barrier). If needed, this slow path also atomically updates
-// the field `obj.field` in the object `obj` holding this reference
-// after marking (contrary to
-// LoadReferenceWithBakerReadBarrierSlowPathARMVIXL above, which never
-// tries to update `obj.field`).
-//
-// This means that after the execution of this slow path, both `ref`
-// and `obj.field` will be up-to-date; i.e., after the flip, both will
-// hold the same to-space reference (unless another thread installed
-// another object reference (different from `ref`) in `obj.field`).
-//
-// Argument `entrypoint` must be a register location holding the read
-// barrier marking runtime entry point to be invoked or an empty
-// location; in the latter case, the read barrier marking runtime
-// entry point will be loaded by the slow path code itself.
-class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL
- : public ReadBarrierMarkSlowPathBaseARMVIXL {
- public:
- LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check,
- vixl32::Register temp1,
- vixl32::Register temp2,
- Location entrypoint = Location::NoLocation())
- : ReadBarrierMarkSlowPathBaseARMVIXL(instruction, ref, entrypoint),
- obj_(obj),
- offset_(offset),
- index_(index),
- scale_factor_(scale_factor),
- needs_null_check_(needs_null_check),
- temp1_(temp1),
- temp2_(temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
- }
-
- const char* GetDescription() const OVERRIDE {
- return "LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL";
- }
-
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
- LocationSummary* locations = instruction_->GetLocations();
- vixl32::Register ref_reg = RegisterFrom(ref_);
- DCHECK(locations->CanCall());
- DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg.GetCode())) << ref_reg;
- DCHECK_NE(ref_.reg(), LocationFrom(temp1_).reg());
-
- // This slow path is only used by the UnsafeCASObject intrinsic at the moment.
- DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
- << "Unexpected instruction in read barrier marking and field updating slow path: "
- << instruction_->DebugName();
- DCHECK(instruction_->GetLocations()->Intrinsified());
- DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
- DCHECK_EQ(offset_, 0u);
- DCHECK_EQ(scale_factor_, ScaleFactor::TIMES_1);
- Location field_offset = index_;
- DCHECK(field_offset.IsRegisterPair()) << field_offset;
-
- // Temporary register `temp1_`, used to store the lock word, must
- // not be IP, as we may use it to emit the reference load (in the
- // call to GenerateRawReferenceLoad below), and we need the lock
- // word to still be in `temp1_` after the reference load.
- DCHECK(!temp1_.Is(ip));
-
- __ Bind(GetEntryLabel());
-
- // The implementation is similar to LoadReferenceWithBakerReadBarrierSlowPathARMVIXL's:
- //
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
- // }
-
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
-
- // /* int32_t */ monitor = obj->monitor_
- uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
- arm_codegen->GetAssembler()->LoadFromOffset(kLoadWord, temp1_, obj_, monitor_offset);
- if (needs_null_check_) {
- codegen->MaybeRecordImplicitNullCheck(instruction_);
- }
- // /* LockWord */ lock_word = LockWord(monitor)
- static_assert(sizeof(LockWord) == sizeof(int32_t),
- "art::LockWord and int32_t have different sizes.");
-
- // Introduce a dependency on the lock_word including the rb_state,
- // which shall prevent load-load reordering without using
- // a memory barrier (which would be more expensive).
- // `obj` is unchanged by this operation, but its value now depends
- // on `temp`.
- __ Add(obj_, obj_, Operand(temp1_, ShiftType::LSR, 32));
-
- // The actual reference load.
- // A possible implicit null check has already been handled above.
- arm_codegen->GenerateRawReferenceLoad(
- instruction_, ref_, obj_, offset_, index_, scale_factor_, /* needs_null_check */ false);
-
- // Mark the object `ref` when `obj` is gray.
- //
- // if (rb_state == ReadBarrier::GrayState())
- // ref = ReadBarrier::Mark(ref);
- //
- // Given the numeric representation, it's enough to check the low bit of the
- // rb_state. We do that by shifting the bit out of the lock word with LSRS
- // which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
- static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
- __ Lsrs(temp1_, temp1_, LockWord::kReadBarrierStateShift + 1);
- __ B(cc, GetExitLabel()); // Carry flag is the last bit shifted out by LSRS.
-
- // Save the old value of the reference before marking it.
- // Note that we cannot use IP to save the old reference, as IP is
- // used internally by the ReadBarrierMarkRegX entry point, and we
- // need the old reference after the call to that entry point.
- DCHECK(!temp1_.Is(ip));
- __ Mov(temp1_, ref_reg);
-
- GenerateReadBarrierMarkRuntimeCall(codegen);
-
- // If the new reference is different from the old reference,
- // update the field in the holder (`*(obj_ + field_offset)`).
- //
- // Note that this field could also hold a different object, if
- // another thread had concurrently changed it. In that case, the
- // LDREX/CMP/BNE sequence of instructions in the compare-and-set
- // (CAS) operation below would abort the CAS, leaving the field
- // as-is.
- __ Cmp(temp1_, ref_reg);
- __ B(eq, GetExitLabel());
-
- // Update the the holder's field atomically. This may fail if
- // mutator updates before us, but it's OK. This is achieved
- // using a strong compare-and-set (CAS) operation with relaxed
- // memory synchronization ordering, where the expected value is
- // the old reference and the desired value is the new reference.
-
- UseScratchRegisterScope temps(arm_codegen->GetVIXLAssembler());
- // Convenience aliases.
- vixl32::Register base = obj_;
- // The UnsafeCASObject intrinsic uses a register pair as field
- // offset ("long offset"), of which only the low part contains
- // data.
- vixl32::Register offset = LowRegisterFrom(field_offset);
- vixl32::Register expected = temp1_;
- vixl32::Register value = ref_reg;
- vixl32::Register tmp_ptr = temps.Acquire(); // Pointer to actual memory.
- vixl32::Register tmp = temp2_; // Value in memory.
-
- __ Add(tmp_ptr, base, offset);
-
- if (kPoisonHeapReferences) {
- arm_codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- arm_codegen->GetAssembler()->PoisonHeapReference(value);
- }
- }
-
- // do {
- // tmp = [r_ptr] - expected;
- // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
-
- vixl32::Label loop_head, comparison_failed, exit_loop;
- __ Bind(&loop_head);
- __ Ldrex(tmp, MemOperand(tmp_ptr));
- __ Cmp(tmp, expected);
- __ B(ne, &comparison_failed, /* far_target */ false);
- __ Strex(tmp, value, MemOperand(tmp_ptr));
- __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false);
- __ B(&loop_head);
- __ Bind(&comparison_failed);
- __ Clrex();
- __ Bind(&exit_loop);
-
- if (kPoisonHeapReferences) {
- arm_codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- arm_codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
- }
-
- __ B(GetExitLabel());
- }
-
- private:
- // The register containing the object holding the marked object reference field.
- const vixl32::Register obj_;
- // The offset, index and scale factor to access the reference in `obj_`.
- uint32_t offset_;
- Location index_;
- ScaleFactor scale_factor_;
- // Is a null check required?
- bool needs_null_check_;
- // A temporary register used to hold the lock word of `obj_`; and
- // also to hold the original reference value, when the reference is
- // marked.
- const vixl32::Register temp1_;
- // A temporary register used in the implementation of the CAS, to
- // update the object's reference field.
- const vixl32::Register temp2_;
-
- DISALLOW_COPY_AND_ASSIGN(LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL);
-};
-
// Slow path generating a read barrier for a heap reference.
class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
public:
@@ -1242,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
@@ -1366,7 +868,7 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierForHeapReferenceSlowPathARMVIXL";
}
@@ -1408,7 +910,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
vixl32::Register reg_out = RegisterFrom(out_);
DCHECK(locations->CanCall());
@@ -1434,7 +936,7 @@ class ReadBarrierForRootSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathARMVIXL"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathARMVIXL"; }
private:
const Location out_;
@@ -1517,6 +1019,10 @@ void CodeGeneratorARMVIXL::DumpFloatingPointRegister(std::ostream& stream, int r
stream << vixl32::SRegister(reg);
}
+const ArmInstructionSetFeatures& CodeGeneratorARMVIXL::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsArmInstructionSetFeatures();
+}
+
static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
uint32_t mask = 0;
for (uint32_t i = regs.GetFirstSRegister().GetCode();
@@ -1531,26 +1037,26 @@ static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
size_t CodeGeneratorARMVIXL::SaveCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
- return 0;
+ UNREACHABLE();
}
// Restores the register from the stack. Returns the size taken on stack.
size_t CodeGeneratorARMVIXL::RestoreCoreRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
- return 0;
+ UNREACHABLE();
}
size_t CodeGeneratorARMVIXL::SaveFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
- return 0;
+ UNREACHABLE();
}
size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
uint32_t reg_id ATTRIBUTE_UNUSED) {
TODO_VIXL32(FATAL);
- return 0;
+ UNREACHABLE();
}
static void GenerateDataProcInstruction(HInstruction::InstructionKind kind,
@@ -2033,7 +1539,7 @@ static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* cod
vixl32::Label done_label;
vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
- __ B(condition.second, final_label, /* far_target */ false);
+ __ B(condition.second, final_label, /* is_far_target= */ false);
__ Mov(out, 1);
if (done_label.IsReferenced()) {
@@ -2334,7 +1840,6 @@ vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
}
CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
@@ -2351,7 +1856,6 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator()),
- isa_features_(isa_features),
uint32_literals_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -2360,11 +1864,14 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
- graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ jit_baker_read_barrier_slow_paths_(std::less<uint32_t>(),
+ graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
// Always save the LR register to mimic Quick.
AddAllocatedRegister(Location::RegisterLocation(LR));
// Give D30 and D31 as scratch register to VIXL. The register allocator only works on
@@ -2420,8 +1927,100 @@ void CodeGeneratorARMVIXL::FixJumpTables() {
void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
FixJumpTables();
+
+ // Emit JIT baker read barrier slow paths.
+ DCHECK(Runtime::Current()->UseJitCompilation() || jit_baker_read_barrier_slow_paths_.empty());
+ for (auto& entry : jit_baker_read_barrier_slow_paths_) {
+ uint32_t encoded_data = entry.first;
+ vixl::aarch32::Label* slow_path_entry = &entry.second.label;
+ __ Bind(slow_path_entry);
+ CompileBakerReadBarrierThunk(*GetAssembler(), encoded_data, /* debug_name= */ nullptr);
+ }
+
GetAssembler()->FinalizeCode();
CodeGenerator::Finalize(allocator);
+
+ // Verify Baker read barrier linker patches.
+ if (kIsDebugBuild) {
+ ArrayRef<const uint8_t> code = allocator->GetMemory();
+ for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
+ DCHECK(info.label.IsBound());
+ uint32_t literal_offset = info.label.GetLocation();
+ DCHECK_ALIGNED(literal_offset, 2u);
+
+ auto GetInsn16 = [&code](uint32_t offset) {
+ DCHECK_ALIGNED(offset, 2u);
+ return (static_cast<uint32_t>(code[offset + 0]) << 0) +
+ (static_cast<uint32_t>(code[offset + 1]) << 8);
+ };
+ auto GetInsn32 = [=](uint32_t offset) {
+ return (GetInsn16(offset) << 16) + (GetInsn16(offset + 2u) << 0);
+ };
+
+ uint32_t encoded_data = info.custom_data;
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ // Check that the next instruction matches the expected LDR.
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(literal_offset + 4u);
+ // LDR (immediate), encoding T3, with correct base_reg.
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0000u, 0xf8d00000u | (base_reg << 16));
+ } else {
+ DCHECK_GE(code.size() - literal_offset, 6u);
+ uint32_t next_insn = GetInsn16(literal_offset + 4u);
+ // LDR (immediate), encoding T1, with correct base_reg.
+ CheckValidReg(next_insn & 0x7u); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xf838u, 0x6800u | (base_reg << 3));
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ DCHECK_GE(code.size() - literal_offset, 8u);
+ uint32_t next_insn = GetInsn32(literal_offset + 4u);
+ // LDR (register) with correct base_reg, S=1 and option=011 (LDR Wt, [Xn, Xm, LSL #2]).
+ CheckValidReg((next_insn >> 12) & 0xfu); // Check destination register.
+ const uint32_t base_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(next_insn & 0xffff0ff0u, 0xf8500020u | (base_reg << 16));
+ CheckValidReg(next_insn & 0xf); // Check index register
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot: {
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ if (width == BakerReadBarrierWidth::kWide) {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+ // LDR (immediate), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0f000u, 0xf8d00000u | (root_reg << 12));
+ } else {
+ DCHECK_GE(literal_offset, 2u);
+ uint32_t prev_insn = GetInsn16(literal_offset - 2u);
+ // LDR (immediate), encoding T1, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xf807u, 0x6800u | root_reg);
+ }
+ break;
+ }
+ case BakerReadBarrierKind::kUnsafeCas: {
+ DCHECK_GE(literal_offset, 4u);
+ uint32_t prev_insn = GetInsn32(literal_offset - 4u);
+ // ADD (register), encoding T3, with correct root_reg.
+ const uint32_t root_reg = BakerReadBarrierFirstRegField::Decode(encoded_data);
+ CHECK_EQ(prev_insn & 0xfff0fff0u, 0xeb000000u | (root_reg << 8));
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+ }
+ }
}
void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
@@ -2494,6 +2093,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
}
if (HasEmptyFrame()) {
+ // Ensure that the CFI opcode list is not empty.
+ GetAssembler()->cfi().Nop();
return;
}
@@ -2560,7 +2161,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
}
- MaybeGenerateMarkingRegisterCheck(/* code */ 1);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
}
void CodeGeneratorARMVIXL::GenerateFrameExit() {
@@ -2669,7 +2270,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Typ
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
- break;
+ UNREACHABLE();
}
return Location::NoLocation();
}
@@ -2828,7 +2429,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock*
}
if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 2);
}
if (!codegen_->GoesToNextBlock(block, successor)) {
__ B(codegen_->GetLabelOf(successor));
@@ -3007,7 +2608,7 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -3026,9 +2627,9 @@ void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeARMVIXL* slow_path =
deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
GenerateTestAndBranch(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
@@ -3194,7 +2795,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
}
}
- GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
+ GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target= */ false);
codegen_->MoveLocation(out, src, type);
if (output_overlaps_with_condition_inputs) {
__ B(target);
@@ -3536,7 +3137,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 3);
}
void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
@@ -3567,7 +3168,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD
DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 4);
return;
}
@@ -3575,7 +3176,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD
codegen_->GenerateStaticOrDirectCall(
invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 5);
}
void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3594,14 +3195,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
if (TryGenerateIntrinsicCode(invoke, codegen_)) {
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 6);
return;
}
codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
DCHECK(!codegen_->IsLeafMethod());
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 7);
}
void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -3679,7 +3280,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv
DCHECK(!codegen_->IsLeafMethod());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 8);
}
void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
@@ -3688,7 +3289,16 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke)
void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
codegen_->GenerateInvokePolymorphicCall(invoke);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 9);
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 10);
}
void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
@@ -4405,7 +4015,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera
int64_t magic;
int shift;
- CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
// TODO(VIXL): Change the static cast to Operand::From() after VIXL is fixed.
__ Mov(temp1, static_cast<int32_t>(magic));
@@ -4697,6 +4307,299 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ vixl32::Register op1 = RegisterFrom(op1_loc);
+ vixl32::Register op2 = RegisterFrom(op2_loc);
+ vixl32::Register out = RegisterFrom(out_loc);
+
+ __ Cmp(op1, op2);
+
+ {
+ ExactAssemblyScope aas(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+
+ __ ite(is_min ? lt : gt);
+ __ mov(is_min ? lt : gt, out, op1);
+ __ mov(is_min ? ge : le, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxLong(LocationSummary* locations, bool is_min) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
+ vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
+ vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
+ vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
+ vixl32::Register out_lo = LowRegisterFrom(out_loc);
+ vixl32::Register out_hi = HighRegisterFrom(out_loc);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp = temps.Acquire();
+
+ DCHECK(op1_lo.Is(out_lo));
+ DCHECK(op1_hi.Is(out_hi));
+
+ // Compare op1 >= op2, or op1 < op2.
+ __ Cmp(out_lo, op2_lo);
+ __ Sbcs(temp, out_hi, op2_hi);
+
+ // Now GE/LT condition code is correct for the long comparison.
+ {
+ vixl32::ConditionType cond = is_min ? ge : lt;
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 3 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ itt(cond);
+ __ mov(cond, out_lo, op2_lo);
+ __ mov(cond, out_hi, op2_hi);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxFloat(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
+ return;
+ }
+
+ vixl32::SRegister op1 = SRegisterFrom(op1_loc);
+ vixl32::SRegister op2 = SRegisterFrom(op2_loc);
+ vixl32::SRegister out = SRegisterFrom(out_loc);
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ const vixl32::Register temp1 = temps.Acquire();
+ vixl32::Register temp2 = RegisterFrom(locations->GetTemp(0));
+ vixl32::Label nan, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &nan, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F32, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* is_far_target= */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
+ __ Vmov(temp1, op1);
+ __ Vmov(temp2, op2);
+ if (is_min) {
+ __ Orr(temp1, temp1, temp2);
+ } else {
+ __ And(temp1, temp1, temp2);
+ }
+ __ Vmov(out, temp1);
+ __ B(final_label);
+
+ // handle NaN input.
+ __ Bind(&nan);
+ __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
+ __ Vmov(out, temp1);
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMaxDouble(HInstruction* minmax, bool is_min) {
+ LocationSummary* locations = minmax->GetLocations();
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+
+ // Optimization: don't generate any code if inputs are the same.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
+ return;
+ }
+
+ vixl32::DRegister op1 = DRegisterFrom(op1_loc);
+ vixl32::DRegister op2 = DRegisterFrom(op2_loc);
+ vixl32::DRegister out = DRegisterFrom(out_loc);
+ vixl32::Label handle_nan_eq, done;
+ vixl32::Label* final_label = codegen_->GetFinalLabel(minmax, &done);
+
+ DCHECK(op1.Is(out));
+
+ __ Vcmp(op1, op2);
+ __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+ __ B(vs, &handle_nan_eq, /* is_far_target= */ false); // if un-ordered, go to NaN handling.
+
+ // op1 <> op2
+ vixl32::ConditionType cond = is_min ? gt : lt;
+ {
+ ExactAssemblyScope it_scope(GetVIXLAssembler(),
+ 2 * kMaxInstructionSizeInBytes,
+ CodeBufferCheckScope::kMaximumSize);
+ __ it(cond);
+ __ vmov(cond, F64, out, op2);
+ }
+ // for <>(not equal), we've done min/max calculation.
+ __ B(ne, final_label, /* is_far_target= */ false);
+
+ // handle op1 == op2, max(+0.0,-0.0).
+ if (!is_min) {
+ __ Vand(F64, out, op1, op2);
+ __ B(final_label);
+ }
+
+ // handle op1 == op2, min(+0.0,-0.0), NaN input.
+ __ Bind(&handle_nan_eq);
+ __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
+
+ if (done.IsReferenced()) {
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kInt64:
+ GenerateMinMaxLong(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ GenerateMinMaxFloat(minmax, is_min);
+ break;
+ case DataType::Type::kFloat64:
+ GenerateMinMaxDouble(minmax, is_min);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderARMVIXL::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderARMVIXL::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ vixl32::Register in_reg = RegisterFrom(locations->InAt(0));
+ vixl32::Register out_reg = RegisterFrom(locations->Out());
+ vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+ __ Asr(mask, in_reg, 31);
+ __ Add(out_reg, in_reg, mask);
+ __ Eor(out_reg, out_reg, mask);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Location in = locations->InAt(0);
+ vixl32::Register in_reg_lo = LowRegisterFrom(in);
+ vixl32::Register in_reg_hi = HighRegisterFrom(in);
+ Location output = locations->Out();
+ vixl32::Register out_reg_lo = LowRegisterFrom(output);
+ vixl32::Register out_reg_hi = HighRegisterFrom(output);
+ DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+ vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+ __ Asr(mask, in_reg_hi, 31);
+ __ Adds(out_reg_lo, in_reg_lo, mask);
+ __ Adc(out_reg_hi, in_reg_hi, mask);
+ __ Eor(out_reg_lo, out_reg_lo, mask);
+ __ Eor(out_reg_hi, out_reg_hi, mask);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ __ Vabs(OutputVRegister(abs), InputVRegisterAt(abs, 0));
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for abs operation " << abs->GetResultType();
+ }
+}
void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
@@ -4813,7 +4716,7 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
__ And(shift_right, RegisterFrom(rhs), 0x1F);
__ Lsrs(shift_left, RegisterFrom(rhs), 6);
__ Rsb(LeaveFlags, shift_left, shift_right, Operand::From(kArmBitsPerWord));
- __ B(cc, &shift_by_32_plus_shift_right, /* far_target */ false);
+ __ B(cc, &shift_by_32_plus_shift_right, /* is_far_target= */ false);
// out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
// out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
@@ -5069,8 +4972,11 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
__ Lsrs(o_h, high, 1);
__ Rrx(o_l, low);
}
+ } else if (shift_value == 0) {
+ __ Mov(o_l, low);
+ __ Mov(o_h, high);
} else {
- DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+ DCHECK(0 < shift_value && shift_value < 32) << shift_value;
if (op->IsShl()) {
__ Lsl(o_h, high, shift_value);
__ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
@@ -5121,35 +5027,15 @@ void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(LocationFrom(kMethodRegister));
- } else {
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
- }
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
locations->SetOut(LocationFrom(r0));
}
void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0));
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
- GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString));
- GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value());
- // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
- ExactAssemblyScope aas(GetVIXLAssembler(),
- vixl32::k16BitT32InstructionSizeInBytes,
- CodeBufferCheckScope::kExactSize);
- __ blx(lr);
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10);
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 11);
}
void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
@@ -5162,14 +5048,12 @@ void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 12);
}
void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
@@ -5291,8 +5175,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
}
case DataType::Type::kInt64: {
__ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare.
- __ B(lt, &less, /* far_target */ false);
- __ B(gt, &greater, /* far_target */ false);
+ __ B(lt, &less, /* is_far_target= */ false);
+ __ B(gt, &greater, /* is_far_target= */ false);
// Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
__ Mov(out, 0);
__ Cmp(LowRegisterFrom(left), LowRegisterFrom(right)); // Unsigned compare.
@@ -5313,8 +5197,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
UNREACHABLE();
}
- __ B(eq, final_label, /* far_target */ false);
- __ B(less_cond, &less, /* far_target */ false);
+ __ B(eq, final_label, /* is_far_target= */ false);
+ __ B(less_cond, &less, /* is_far_target= */ false);
__ Bind(&greater);
__ Mov(out, 1);
@@ -5610,18 +5494,10 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
} else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation()) {
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // loads we need a temporary only if the offset is too big.
- if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
- locations->AddTemp(Location::RequiresRegister());
- }
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- } else {
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
+ if (field_info.GetFieldOffset().Uint32Value() >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
}
@@ -5733,11 +5609,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
case DataType::Type::kReference: {
// /* HeapReference<Object> */ out = *(base + offset)
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp_loc = locations->GetTemp(0);
+ Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, temp_loc, /* needs_null_check */ true);
+ instruction, out, base, offset, maybe_temp, /* needs_null_check= */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -6036,30 +5912,20 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
}
if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier.
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation() &&
- instruction->GetIndex()->IsConstant()) {
+ if (instruction->GetIndex()->IsConstant()) {
// Array loads with constant index are treated as field loads.
- // If link-time thunks for the Baker read barrier are enabled, for AOT
- // constant index loads we need a temporary only if the offset is too big.
+ // We need a temporary register for the read barrier load in
+ // CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier()
+ // only if the offset is too big.
uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction);
uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue();
offset += index << DataType::SizeShift(DataType::Type::kReference);
if (offset >= kReferenceLoadMinFarOffset) {
locations->AddTemp(Location::RequiresRegister());
}
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- } else if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
- !Runtime::Current()->UseJitCompilation() &&
- !instruction->GetIndex()->IsConstant()) {
- // We need a non-scratch temporary for the array data pointer.
- locations->AddTemp(Location::RequiresRegister());
- // And we always need the reserved entrypoint register.
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
} else {
+ // We need a non-scratch temporary for the array data pointer in
+ // CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier().
locations->AddTemp(Location::RequiresRegister());
}
} else if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
@@ -6103,7 +5969,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ B(cs, &uncompressed_load, /* far_target */ false);
+ __ B(cs, &uncompressed_load, /* is_far_target= */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
RegisterFrom(out_loc),
obj,
@@ -6145,7 +6011,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
__ Lsrs(length, length, 1u); // LSRS has a 16-bit encoding, TST (immediate) does not.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ B(cs, &uncompressed_load, /* far_target */ false);
+ __ B(cs, &uncompressed_load, /* is_far_target= */ false);
__ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
__ B(final_label);
__ Bind(&uncompressed_load);
@@ -6172,22 +6038,24 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
// /* HeapReference<Object> */ out =
// *(obj + data_offset + index * sizeof(HeapReference<Object>))
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Location temp = locations->GetTemp(0);
// Note that a potential implicit null check is handled in this
// CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier call.
DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0)));
if (index.IsConstant()) {
// Array load with a constant index can be treated as a field load.
+ Location maybe_temp =
+ (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location();
data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type);
codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
out_loc,
obj,
data_offset,
- locations->GetTemp(0),
- /* needs_null_check */ false);
+ maybe_temp,
+ /* needs_null_check= */ false);
} else {
+ Location temp = locations->GetTemp(0);
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, temp, /* needs_null_check */ false);
+ out_loc, obj, data_offset, index, temp, /* needs_null_check= */ false);
}
} else {
vixl32::Register out = OutputRegister(instruction);
@@ -6462,7 +6330,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
if (instruction->StaticTypeOfArrayIsObjectArray()) {
vixl32::Label do_put;
- __ B(eq, &do_put, /* far_target */ false);
+ __ B(eq, &do_put, /* is_far_target= */ false);
// If heap poisoning is enabled, the `temp1` reference has
// not been unpoisoned yet; unpoison it now.
GetAssembler()->MaybeUnpoisonHeapReference(temp1);
@@ -6706,9 +6574,25 @@ void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
if (can_be_null) {
__ CompareAndBranchIfZero(value, &is_null);
}
+ // Load the address of the card table into `card`.
GetAssembler()->LoadFromOffset(
kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
+ // Calculate the offset (in the card table) of the card corresponding to
+ // `object`.
__ Lsr(temp, object, Operand::From(gc::accounting::CardTable::kCardShift));
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the STRB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ Strb(card, MemOperand(card, temp));
if (can_be_null) {
__ Bind(&is_null);
@@ -6748,7 +6632,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi
return;
}
GenerateSuspendCheck(instruction, nullptr);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 13);
}
void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
@@ -7040,14 +6924,14 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -7083,23 +6967,11 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
- // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
- // that the the kPrimNot result register is the same as the first argument register.
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
}
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- if (load_kind == HLoadClass::LoadKind::kBssEntry ||
- (load_kind == HLoadClass::LoadKind::kReferrersClass &&
- !Runtime::Current()->UseJitCompilation())) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
}
// NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
@@ -7108,7 +6980,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
HLoadClass::LoadKind load_kind = cls->GetLoadKind();
if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
codegen_->GenerateLoadClassRuntimeCall(cls);
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 14);
return;
}
DCHECK(!cls->NeedsAccessCheck());
@@ -7127,11 +6999,11 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
DCHECK(!cls->MustGenerateClinitCheck());
// /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
vixl32::Register current_method = InputRegisterAt(cls, 0);
- GenerateGcRootFieldLoad(cls,
- out_loc,
- current_method,
- ArtMethod::DeclaringClassOffset().Int32Value(),
- read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls,
+ out_loc,
+ current_method,
+ ArtMethod::DeclaringClassOffset().Int32Value(),
+ read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
@@ -7142,42 +7014,35 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
codegen_->EmitMovwMovtPlaceholder(labels, out);
break;
}
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
- break;
- }
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
codegen_->EmitMovwMovtPlaceholder(labels, out);
- __ Ldr(out, MemOperand(out, /* offset */ 0));
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Sub(out, out, Operand(masked_hash));
- }
+ __ Ldr(out, MemOperand(out, /* offset= */ 0));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress: {
__ Ldr(out, codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass()));
// /* GcRoot<mirror::Class> */ out = *out
- GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
+ codegen_->GenerateGcRootFieldLoad(cls, out_loc, out, /* offset= */ 0, read_barrier_option);
break;
}
case HLoadClass::LoadKind::kRuntimeCall:
@@ -7189,8 +7054,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
LoadClassSlowPathARMVIXL* slow_path =
- new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -7200,10 +7064,30 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_
} else {
__ Bind(slow_path->GetExitLabel());
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 15);
}
}
+void LocationsBuilderARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ Location location = LocationFrom(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ Location location = LocationFrom(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+}
+
void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
@@ -7211,15 +7095,14 @@ void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
}
void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
// We assume the class is not null.
LoadClassSlowPathARMVIXL* slow_path =
- new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(),
- check,
- check->GetDexPc(),
- /* do_clinit */ true);
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
}
@@ -7243,18 +7126,79 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorARMVIXL::GenerateBitstringTypeCheckCompare(
+ HTypeCheckInstruction* check,
+ vixl32::Register temp,
+ vixl32::FlagsUpdate flags_update) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ // Note that HInstanceOf shall check for zero value in `temp` but HCheckCast needs
+ // the Z flag for BNE. This is indicated by the `flags_update` parameter.
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ Ldrh(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ Ldr(temp, MemOperand(temp, mirror::Class::StatusOffset().Int32Value()));
+ if (GetAssembler()->ShifterOperandCanHold(SUB, path_to_root)) {
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root);
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else if (IsUint<16>(path_to_root)) {
+ if (temp.IsLow()) {
+ // Note: Optimized for size but contains one more dependent instruction than necessary.
+ // MOVW+SUB(register) would be 8 bytes unless we find a low-reg temporary but the
+ // macro assembler would use the high reg IP for the constant by default.
+ // Compare the bitstring bits using SUB.
+ __ Sub(temp, temp, path_to_root & 0x00ffu); // 16-bit SUB (immediate) T2
+ __ Sub(temp, temp, path_to_root & 0xff00u); // 32-bit SUB (immediate) T3
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(flags_update, temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ } else {
+ // Extract the bitstring bits.
+ __ Ubfx(temp, temp, 0, mask_bits);
+ // Check if the bitstring bits are equal to `path_to_root`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root);
+ } else {
+ __ Sub(temp, temp, path_to_root);
+ }
+ }
+ } else {
+ // Shift out bits that do not contribute to the comparison.
+ __ Lsl(temp, temp, dchecked_integral_cast<uint32_t>(32u - mask_bits));
+ // Check if the shifted bitstring bits are equal to `path_to_root << (32u - mask_bits)`.
+ if (flags_update == SetFlags) {
+ __ Cmp(temp, path_to_root << (32u - mask_bits));
+ } else {
+ __ Sub(temp, temp, path_to_root << (32u - mask_bits));
+ }
+ }
+ }
+}
+
HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -7272,15 +7216,7 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need, including temps.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
- // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
- // that the the kPrimNot result register is the same as the first argument register.
- locations->SetCustomSlowPathCallerSaves(caller_saves);
- if (kUseBakerReadBarrier && kBakerReadBarrierLinkTimeThunksEnableForGcRoots) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -7304,33 +7240,32 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
codegen_->EmitMovwMovtPlaceholder(labels, out);
return;
}
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
- return;
- }
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
codegen_->EmitMovwMovtPlaceholder(labels, out);
- __ Ldr(out, MemOperand(out, /* offset */ 0));
+ __ Ldr(out, MemOperand(out, /* offset= */ 0));
return;
}
case HLoadString::LoadKind::kBssEntry: {
- DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
codegen_->EmitMovwMovtPlaceholder(labels, out);
- GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(
+ load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
LoadStringSlowPathARMVIXL* slow_path =
new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load);
codegen_->AddSlowPath(slow_path);
__ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 16);
+ return;
+ }
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
return;
}
case HLoadString::LoadKind::kJitTableAddress: {
@@ -7338,7 +7273,8 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
load->GetStringIndex(),
load->GetString()));
// /* GcRoot<mirror::String> */ out = *out
- GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption);
+ codegen_->GenerateGcRootFieldLoad(
+ load, out_loc, out, /* offset= */ 0, kCompilerReadBarrierOption);
return;
}
default:
@@ -7351,7 +7287,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE
__ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 17);
}
static int32_t GetExceptionTlsOffset() {
@@ -7434,6 +7370,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7442,14 +7380,17 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The "out" register is used as a temporary, so it overlaps with the inputs.
// Note that TypeCheckSlowPathARM uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- codegen_->MaybeAddBakerCcEntrypointTempForFields(locations);
- }
}
void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
@@ -7457,7 +7398,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(instruction);
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7476,7 +7419,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
if (instruction->MustDoNullCheck()) {
DCHECK(!out.Is(obj));
__ Mov(out, 0);
- __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
+ __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
}
switch (type_check_kind) {
@@ -7508,7 +7451,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ it(eq);
__ mov(eq, out, 1);
} else {
- __ B(ne, final_label, /* far_target */ false);
+ __ B(ne, final_label, /* is_far_target= */ false);
__ Mov(out, 1);
}
@@ -7536,9 +7479,9 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
read_barrier_option);
// If `out` is null, we use it for the result, and jump to the final label.
- __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
__ Cmp(out, cls);
- __ B(ne, &loop, /* far_target */ false);
+ __ B(ne, &loop, /* is_far_target= */ false);
__ Mov(out, 1);
break;
}
@@ -7557,7 +7500,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
vixl32::Label loop, success;
__ Bind(&loop);
__ Cmp(out, cls);
- __ B(eq, &success, /* far_target */ false);
+ __ B(eq, &success, /* is_far_target= */ false);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -7567,7 +7510,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
// This is essentially a null check, but it sets the condition flags to the
// proper value for the code that follows the loop, i.e. not `eq`.
__ Cmp(out, 1);
- __ B(hs, &loop, /* far_target */ false);
+ __ B(hs, &loop, /* is_far_target= */ false);
// Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
// we check that the output is in a low register, so that a 16-bit MOV
@@ -7612,7 +7555,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
// Do an exact check.
vixl32::Label exact_check;
__ Cmp(out, cls);
- __ B(eq, &exact_check, /* far_target */ false);
+ __ B(eq, &exact_check, /* is_far_target= */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -7621,7 +7564,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
maybe_temp_loc,
read_barrier_option);
// If `out` is null, we use it for the result, and jump to the final label.
- __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
__ Cmp(out, 0);
@@ -7643,7 +7586,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ it(eq);
__ mov(eq, out, 1);
} else {
- __ B(ne, final_label, /* far_target */ false);
+ __ B(ne, final_label, /* is_far_target= */ false);
__ Bind(&exact_check);
__ Mov(out, 1);
}
@@ -7663,7 +7606,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
__ Cmp(out, cls);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ B(ne, slow_path->GetEntryLabel());
__ Mov(out, 1);
@@ -7692,11 +7635,31 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction)
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out, DontCare);
+ // If `out` is a low reg and we would have another low reg temp, we could
+ // optimize this as RSBS+ADC, see GenerateConditionWithZero().
+ //
+ // Also, in some cases when `out` is a low reg and we're loading a constant to IP
+ // it would make sense to use CMP+MOV+IT+MOV instead of SUB+CLZ+LSR as the code size
+ // would be the same and we would have fewer direct data dependencies.
+ codegen_->GenerateConditionWithZero(kCondEQ, out, out); // CLZ+LSR
+ break;
+ }
}
if (done.IsReferenced()) {
@@ -7714,7 +7677,13 @@ void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -7723,7 +7692,9 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
vixl32::Register obj = InputRegisterAt(instruction, 0);
- vixl32::Register cls = InputRegisterAt(instruction, 1);
+ vixl32::Register cls = (type_check_kind == TypeCheckKind::kBitstringCheck)
+ ? vixl32::Register()
+ : InputRegisterAt(instruction, 1);
Location temp_loc = locations->GetTemp(0);
vixl32::Register temp = RegisterFrom(temp_loc);
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -7749,7 +7720,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
// Avoid null check if we know obj is not null.
if (instruction->MustDoNullCheck()) {
- __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
+ __ CompareAndBranchIfZero(obj, final_label, /* is_far_target= */ false);
}
switch (type_check_kind) {
@@ -7796,7 +7767,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
// Otherwise, compare the classes.
__ Cmp(temp, cls);
- __ B(ne, &loop, /* far_target */ false);
+ __ B(ne, &loop, /* is_far_target= */ false);
break;
}
@@ -7813,7 +7784,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
vixl32::Label loop;
__ Bind(&loop);
__ Cmp(temp, cls);
- __ B(eq, final_label, /* far_target */ false);
+ __ B(eq, final_label, /* is_far_target= */ false);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
@@ -7841,7 +7812,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
// Do an exact check.
__ Cmp(temp, cls);
- __ B(eq, final_label, /* far_target */ false);
+ __ B(eq, final_label, /* is_far_target= */ false);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
@@ -7905,7 +7876,21 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
__ Sub(RegisterFrom(maybe_temp2_loc), RegisterFrom(maybe_temp2_loc), 2);
// Compare the classes and continue the loop if they do not match.
__ Cmp(cls, RegisterFrom(maybe_temp3_loc));
- __ B(ne, &start_loop, /* far_target */ false);
+ __ B(ne, &start_loop, /* is_far_target= */ false);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp, SetFlags);
+ __ B(ne, type_check_slow_path->GetEntryLabel());
break;
}
}
@@ -7932,7 +7917,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i
} else {
CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17);
+ codegen_->MaybeGenerateMarkingRegisterCheck(/* code= */ 18);
}
void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
@@ -8287,7 +8272,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, maybe_temp, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -8322,7 +8307,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, maybe_temp, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -8337,7 +8322,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
}
}
-void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+void CodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
HInstruction* instruction,
Location root,
vixl32::Register obj,
@@ -8349,81 +8334,52 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
if (kUseBakerReadBarrier) {
// Fast path implementation of art::ReadBarrier::BarrierForRoot when
// Baker's read barrier are used.
- if (kBakerReadBarrierLinkTimeThunksEnableForGcRoots &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // We use link-time generated thunks for the slow path. That thunk
- // checks the reference and jumps to the entrypoint if needed.
- //
- // lr = &return_address;
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto gc_root_thunk<root_reg>(lr)
- // }
- // return_address:
- UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
- uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierGcRootData(
- root_reg.GetCode(), narrow);
- vixl32::Label* bne_label = codegen_->NewBakerReadBarrierPatch(custom_data);
-
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), 4 * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- // Currently the offset is always within range. If that changes,
- // we shall have to split the load the same way as for fields.
- DCHECK_LT(offset, kReferenceLoadMinFarOffset);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
- EmitPlaceholderBne(codegen_, bne_label);
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
- : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
- } else {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
- // the Marking Register) to decide whether we need to enter
- // the slow path to mark the GC root.
- //
- // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // root = entrypoint(root); // root = ReadBarrier::Mark(root); // Entry point call.
- // }
-
- // Slow path marking the GC root `root`. The entrypoint will
- // be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root);
- codegen_->AddSlowPath(slow_path);
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in
+ // the Marking Register) to decide whether we need to enter
+ // the slow path to mark the GC root.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the reference
+ // and jumps to the entrypoint if needed.
+ //
+ // lr = &return_address;
+ // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load.
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto gc_root_thunk<root_reg>(lr)
+ // }
+ // return_address:
- // /* GcRoot<mirror::Object> */ root = *(obj + offset)
- GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
- static_assert(
- sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
- "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
- "have different sizes.");
- static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
- "art::mirror::CompressedReference<mirror::Object> and int32_t "
- "have different sizes.");
-
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- __ Bind(slow_path->GetExitLabel());
- }
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ bool narrow = CanEmitNarrowLdr(root_reg, obj, offset);
+ uint32_t custom_data = EncodeBakerReadBarrierGcRootData(root_reg.GetCode(), narrow);
+
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u) + /* LDR */ (narrow ? 1u : 0u);
+ size_t wide_instructions = /* ADR+CMP+LDR+BNE */ 4u - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ // Currently the offset is always within range. If that changes,
+ // we shall have to split the load the same way as for fields.
+ DCHECK_LT(offset, kReferenceLoadMinFarOffset);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), root_reg, MemOperand(obj, offset));
+ EmitBakerReadBarrierBne(custom_data);
+ __ bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET);
} else {
// GC root loaded through a slow path for read barriers other
// than Baker's.
// /* GcRoot<mirror::Object>* */ root = obj + offset
__ Add(root_reg, obj, offset);
// /* mirror::Object* */ root = root->Read()
- codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+ GenerateReadBarrierForRootSlow(instruction, root, root);
}
} else {
// Plain GC root load with no read barrier.
@@ -8432,112 +8388,129 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
// Note that GC roots are not affected by heap poisoning, thus we
// do not have to unpoison `root_reg` here.
}
- codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ 19);
}
-void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) {
+void CodeGeneratorARMVIXL::GenerateUnsafeCasOldValueAddWithBakerReadBarrier(
+ vixl::aarch32::Register old_value,
+ vixl::aarch32::Register adjusted_old_value,
+ vixl::aarch32::Register expected) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields) {
- if (!Runtime::Current()->UseJitCompilation()) {
- locations->AddTemp(Location::RegisterLocation(kBakerCcEntrypointRegister.GetCode()));
- }
- }
+
+ // Similar to the Baker RB path in GenerateGcRootFieldLoad(), with an ADD instead of LDR.
+ uint32_t custom_data = EncodeBakerReadBarrierUnsafeCasData(old_value.GetCode());
+
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+ size_t wide_instructions = /* ADR+CMP+ADD+BNE */ 4u - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ add(EncodingSize(Wide), old_value, adjusted_old_value, Operand(expected)); // Preserves flags.
+ EmitBakerReadBarrierBne(custom_data);
+ __ bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ADD_OFFSET);
}
void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
- uint32_t offset,
- Location temp,
+ const vixl32::MemOperand& src,
bool needs_null_check) {
DCHECK(kEmitCompilerReadBarrier);
DCHECK(kUseBakerReadBarrier);
- if (kBakerReadBarrierLinkTimeThunksEnableForFields &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use link-time generated thunks for the slow path. That thunk checks
- // the holder and jumps to the entrypoint if needed. If the holder is not
- // gray, it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto field_thunk<holder_reg, base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = *(obj+offset);
- // gray_return_address:
-
- DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
- vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset);
- vixl32::Register base = obj;
- if (offset >= kReferenceLoadMinFarOffset) {
- base = RegisterFrom(temp);
- DCHECK(!base.Is(kBakerCcEntrypointRegister));
- static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
- __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
- offset &= (kReferenceLoadMinFarOffset - 1u);
- // Use narrow LDR only for small offsets. Generating narrow encoding LDR for the large
- // offsets with `(offset & (kReferenceLoadMinFarOffset - 1u)) < 32u` would most likely
- // increase the overall code size when taking the generated thunks into account.
- DCHECK(!narrow);
- }
- UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data = linker::Thumb2RelativePatcher::EncodeBakerReadBarrierFieldData(
- base.GetCode(), obj.GetCode(), narrow);
- vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
+ // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
+ // Marking Register) to decide whether we need to enter the slow
+ // path to mark the reference. Then, in the slow path, check the
+ // gray bit in the lock word of the reference's holder (`obj`) to
+ // decide whether to mark `ref` or not.
+ //
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
+ //
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto field_thunk<holder_reg, base_reg>(lr)
+ // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = *(obj+offset);
+ // gray_return_address:
- {
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitPlaceholderBne(this, bne_label);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset));
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- // Note: We need a specific width for the unpoisoning NEG.
- if (kPoisonHeapReferences) {
- if (narrow) {
- // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
- __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
- } else {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
- }
+ DCHECK(src.GetAddrMode() == vixl32::Offset);
+ DCHECK_ALIGNED(src.GetOffsetImmediate(), sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ bool narrow = CanEmitNarrowLdr(ref_reg, src.GetBaseRegister(), src.GetOffsetImmediate());
+
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ uint32_t custom_data =
+ EncodeBakerReadBarrierFieldData(src.GetBaseRegister().GetCode(), obj.GetCode(), narrow);
+
+ {
+ size_t narrow_instructions =
+ /* CMP */ (mr.IsLow() ? 1u : 0u) +
+ /* LDR+unpoison? */ (narrow ? (kPoisonHeapReferences ? 2u : 1u) : 0u);
+ size_t wide_instructions =
+ /* ADR+CMP+LDR+BNE+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitBakerReadBarrierBne(custom_data);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, src);
+ if (needs_null_check) {
+ MaybeRecordImplicitNullCheck(instruction);
+ }
+ // Note: We need a specific width for the unpoisoning NEG.
+ if (kPoisonHeapReferences) {
+ if (narrow) {
+ // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB).
+ __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0));
+ } else {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
- : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
}
- MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip));
- return;
+ __ bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET);
}
-
- // /* HeapReference<Object> */ ref = *(obj + offset)
- Location no_index = Location::NoLocation();
- ScaleFactor no_scale_factor = TIMES_1;
- GenerateReferenceLoadWithBakerReadBarrier(
- instruction, ref, obj, offset, no_index, no_scale_factor, temp, needs_null_check);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ 20, /* temp_loc= */ LocationFrom(ip));
}
-void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
Location ref,
vixl32::Register obj,
+ uint32_t offset,
+ Location temp,
+ bool needs_null_check) {
+ DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>));
+ vixl32::Register base = obj;
+ if (offset >= kReferenceLoadMinFarOffset) {
+ base = RegisterFrom(temp);
+ static_assert(IsPowerOfTwo(kReferenceLoadMinFarOffset), "Expecting a power of 2.");
+ __ Add(base, obj, Operand(offset & ~(kReferenceLoadMinFarOffset - 1u)));
+ offset &= (kReferenceLoadMinFarOffset - 1u);
+ }
+ GenerateFieldLoadWithBakerReadBarrier(
+ instruction, ref, obj, MemOperand(base, offset), needs_null_check);
+}
+
+void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(Location ref,
+ vixl32::Register obj,
uint32_t data_offset,
Location index,
Location temp,
@@ -8550,229 +8523,60 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i
"art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
ScaleFactor scale_factor = TIMES_4;
- if (kBakerReadBarrierLinkTimeThunksEnableForArrays &&
- !Runtime::Current()->UseJitCompilation()) {
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to mark the reference. Then, in the slow path, check the
- // gray bit in the lock word of the reference's holder (`obj`) to
- // decide whether to mark `ref` or not.
- //
- // We use link-time generated thunks for the slow path. That thunk checks
- // the holder and jumps to the entrypoint if needed. If the holder is not
- // gray, it creates a fake dependency and returns to the LDR instruction.
- //
- // lr = &gray_return_address;
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // goto array_thunk<base_reg>(lr)
- // }
- // not_gray_return_address:
- // // Original reference load. If the offset is too large to fit
- // // into LDR, we use an adjusted base register here.
- // HeapReference<mirror::Object> reference = data[index];
- // gray_return_address:
-
- DCHECK(index.IsValid());
- vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
- vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
- vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
- DCHECK(!data_reg.Is(kBakerCcEntrypointRegister));
-
- UseScratchRegisterScope temps(GetVIXLAssembler());
- ExcludeIPAndBakerCcEntrypointRegister(&temps, instruction);
- uint32_t custom_data =
- linker::Thumb2RelativePatcher::EncodeBakerReadBarrierArrayData(data_reg.GetCode());
- vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data);
-
- __ Add(data_reg, obj, Operand(data_offset));
- {
- vixl::EmissionCheckScope guard(
- GetVIXLAssembler(),
- (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes);
- vixl32::Label return_address;
- EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
- __ cmp(mr, Operand(0));
- EmitPlaceholderBne(this, bne_label);
- ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
- __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
- DCHECK(!needs_null_check); // The thunk cannot handle the null check.
- // Note: We need a Wide NEG for the unpoisoning.
- if (kPoisonHeapReferences) {
- __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
- }
- __ Bind(&return_address);
- DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
- BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
- }
- MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip));
- return;
- }
-
- // /* HeapReference<Object> */ ref =
- // *(obj + data_offset + index * sizeof(HeapReference<Object>))
- GenerateReferenceLoadWithBakerReadBarrier(
- instruction, ref, obj, data_offset, index, scale_factor, temp, needs_null_check);
-}
-
-void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- Location temp,
- bool needs_null_check) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
-
// Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
// Marking Register) to decide whether we need to enter the slow
// path to mark the reference. Then, in the slow path, check the
// gray bit in the lock word of the reference's holder (`obj`) to
// decide whether to mark `ref` or not.
//
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // }
- // } else {
- // HeapReference<mirror::Object> ref = *src; // Original reference load.
- // }
-
- vixl32::Register temp_reg = RegisterFrom(temp);
-
- // Slow path marking the object `ref` when the GC is marking. The
- // entrypoint will be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL(
- instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg);
- AddSlowPath(slow_path);
-
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: just load the reference.
- GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check);
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ 21);
-}
-
-void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl32::Register obj,
- Location field_offset,
- Location temp,
- bool needs_null_check,
- vixl32::Register temp2) {
- DCHECK(kEmitCompilerReadBarrier);
- DCHECK(kUseBakerReadBarrier);
-
- // Query `art::Thread::Current()->GetIsGcMarking()` (stored in the
- // Marking Register) to decide whether we need to enter the slow
- // path to update the reference field within `obj`. Then, in the
- // slow path, check the gray bit in the lock word of the reference's
- // holder (`obj`) to decide whether to mark `ref` and update the
- // field or not.
+ // We use shared thunks for the slow path; shared within the method
+ // for JIT, across methods for AOT. That thunk checks the holder
+ // and jumps to the entrypoint if needed. If the holder is not gray,
+ // it creates a fake dependency and returns to the LDR instruction.
//
- // if (mr) { // Thread::Current()->GetIsGcMarking()
- // // Slow path.
- // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
- // lfence; // Load fence or artificial data dependency to prevent load-load reordering
- // HeapReference<mirror::Object> ref = *(obj + field_offset); // Reference load.
- // bool is_gray = (rb_state == ReadBarrier::GrayState());
- // if (is_gray) {
- // old_ref = ref;
- // entrypoint = Thread::Current()->pReadBarrierMarkReg ## root.reg()
- // ref = entrypoint(ref); // ref = ReadBarrier::Mark(ref); // Runtime entry point call.
- // compareAndSwapObject(obj, field_offset, old_ref, ref);
+ // lr = &gray_return_address;
+ // if (mr) { // Thread::Current()->GetIsGcMarking()
+ // goto array_thunk<base_reg>(lr)
// }
- // }
+ // not_gray_return_address:
+ // // Original reference load. If the offset is too large to fit
+ // // into LDR, we use an adjusted base register here.
+ // HeapReference<mirror::Object> reference = data[index];
+ // gray_return_address:
- vixl32::Register temp_reg = RegisterFrom(temp);
+ DCHECK(index.IsValid());
+ vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32);
+ vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference);
+ vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer.
- // Slow path updating the object reference at address `obj + field_offset`
- // when the GC is marking. The entrypoint will be loaded by the slow path code.
- SlowPathCodeARMVIXL* slow_path =
- new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL(
- instruction,
- ref,
- obj,
- /* offset */ 0u,
- /* index */ field_offset,
- /* scale_factor */ ScaleFactor::TIMES_1,
- needs_null_check,
- temp_reg,
- temp2);
- AddSlowPath(slow_path);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ uint32_t custom_data = EncodeBakerReadBarrierArrayData(data_reg.GetCode());
- __ CompareAndBranchIfNonZero(mr, slow_path->GetEntryLabel());
- // Fast path: the GC is not marking: nothing to do (the field is
- // up-to-date, and we don't need to load the reference).
- __ Bind(slow_path->GetExitLabel());
- MaybeGenerateMarkingRegisterCheck(/* code */ 22);
-}
-
-void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check) {
- DataType::Type type = DataType::Type::kReference;
- vixl32::Register ref_reg = RegisterFrom(ref, type);
-
- // If needed, vixl::EmissionCheckScope guards are used to ensure
- // that no pools are emitted between the load (macro) instruction
- // and MaybeRecordImplicitNullCheck.
-
- if (index.IsValid()) {
- // Load types involving an "index": ArrayGet,
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics.
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset + (index << scale_factor))
- if (index.IsConstant()) {
- size_t computed_offset =
- (Int32ConstantFrom(index) << scale_factor) + offset;
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- } else {
- // Handle the special case of the
- // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
- // intrinsics, which use a register pair as index ("long
- // offset"), of which only the low part contains data.
- vixl32::Register index_reg = index.IsRegisterPair()
- ? LowRegisterFrom(index)
- : RegisterFrom(index);
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
- __ Add(temp, obj, Operand(index_reg, ShiftType::LSL, scale_factor));
- {
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, temp, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
- }
- }
- }
- } else {
- // /* HeapReference<mirror::Object> */ ref = *(obj + offset)
- vixl::EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
- GetAssembler()->LoadFromOffset(kLoadWord, ref_reg, obj, offset);
- if (needs_null_check) {
- MaybeRecordImplicitNullCheck(instruction);
+ __ Add(data_reg, obj, Operand(data_offset));
+ {
+ size_t narrow_instructions = /* CMP */ (mr.IsLow() ? 1u : 0u);
+ size_t wide_instructions =
+ /* ADR+CMP+BNE+LDR+unpoison? */ (kPoisonHeapReferences ? 5u : 4u) - narrow_instructions;
+ size_t exact_size = wide_instructions * vixl32::k32BitT32InstructionSizeInBytes +
+ narrow_instructions * vixl32::k16BitT32InstructionSizeInBytes;
+ ExactAssemblyScope guard(GetVIXLAssembler(), exact_size);
+ vixl32::Label return_address;
+ EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address);
+ __ cmp(mr, Operand(0));
+ EmitBakerReadBarrierBne(custom_data);
+ ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset();
+ __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor));
+ DCHECK(!needs_null_check); // The thunk cannot handle the null check.
+ // Note: We need a Wide NEG for the unpoisoning.
+ if (kPoisonHeapReferences) {
+ __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0));
}
+ __ bind(&return_address);
+ DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(),
+ BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET);
}
-
- // Object* ref = ref_addr->AsMirrorPtr()
- GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
+ MaybeGenerateMarkingRegisterCheck(/* code= */ 21, /* temp_loc= */ LocationFrom(ip));
}
void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) {
@@ -8855,7 +8659,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruct
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
return desired_dispatch_info;
}
@@ -8905,9 +8709,14 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
EmitMovwMovtPlaceholder(labels, temp_reg);
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* labels = NewBootImageRelRoPatch(boot_image_offset);
+ vixl32::Register temp_reg = RegisterFrom(temp);
+ EmitMovwMovtPlaceholder(labels, temp_reg);
+ GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -8916,6 +8725,9 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -9005,6 +8817,18 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall(
}
}
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data) {
+ return NewPcRelativePatch(/* dex_file= */ nullptr, intrinsic_data, &boot_image_intrinsic_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset) {
+ return NewPcRelativePatch(/* dex_file= */ nullptr,
+ boot_image_offset,
+ &boot_image_method_patches_);
+}
+
CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch(
MethodReference target_method) {
return NewPcRelativePatch(
@@ -9043,13 +8867,24 @@ CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePa
return &patches->back();
}
-vixl::aarch32::Label* CodeGeneratorARMVIXL::NewBakerReadBarrierPatch(uint32_t custom_data) {
- baker_read_barrier_patches_.emplace_back(custom_data);
- return &baker_read_barrier_patches_.back().label;
+void CodeGeneratorARMVIXL::EmitBakerReadBarrierBne(uint32_t custom_data) {
+ DCHECK(!__ AllowMacroInstructions()); // In ExactAssemblyScope.
+ if (Runtime::Current()->UseJitCompilation()) {
+ auto it = jit_baker_read_barrier_slow_paths_.FindOrAdd(custom_data);
+ vixl::aarch32::Label* slow_path_entry = &it->second.label;
+ __ b(ne, EncodingSize(Wide), slow_path_entry);
+ } else {
+ baker_read_barrier_patches_.emplace_back(custom_data);
+ vixl::aarch32::Label* patch_label = &baker_read_barrier_patches_.back().label;
+ __ bind(patch_label);
+ vixl32::Label placeholder_label;
+ __ b(ne, EncodingSize(Wide), &placeholder_label); // Placeholder, patched at link-time.
+ __ bind(&placeholder_label);
+ }
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
- return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
+ return DeduplicateUint32Literal(address, &uint32_literals_);
}
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
@@ -9060,7 +8895,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral(
return jit_string_patches_.GetOrCreate(
StringReference(&dex_file, string_index),
[this]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+ return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
});
}
@@ -9071,10 +8906,50 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFil
return jit_class_patches_.GetOrCreate(
TypeReference(&dex_file, type_index),
[this]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u);
+ return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ 0u);
});
}
+void CodeGeneratorARMVIXL::LoadBootImageAddress(vixl32::Register reg,
+ uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ NewBootImageIntrinsicPatch(boot_image_reference);
+ EmitMovwMovtPlaceholder(labels, reg);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
+ NewBootImageRelRoPatch(boot_image_reference);
+ EmitMovwMovtPlaceholder(labels, reg);
+ __ Ldr(reg, MemOperand(reg, /* offset= */ 0));
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
+ __ Ldr(reg, DeduplicateBootImageAddressLiteral(dchecked_integral_cast<uint32_t>(address)));
+ }
+}
+
+void CodeGeneratorARMVIXL::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConventionARMVIXL calling_convention;
+ vixl32::Register argument = calling_convention.GetRegisterAt(0);
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ PcRelativePatchInfo* labels = NewBootImageTypePatch(*target_method.dex_file, type_idx);
+ EmitMovwMovtPlaceholder(labels, argument);
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
const ArenaDeque<PcRelativePatchInfo>& infos,
@@ -9095,6 +8970,15 @@ inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -9104,6 +8988,7 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
/* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() +
/* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() +
+ /* MOVW+MOVT for each entry */ 2u * boot_image_intrinsic_patches_.size() +
baker_read_barrier_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
@@ -9113,12 +8998,14 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -9133,13 +9020,52 @@ void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* l
DCHECK_EQ(size, linker_patches->size());
}
+bool CodeGeneratorARMVIXL::NeedsThunkCode(const linker::LinkerPatch& patch) const {
+ return patch.GetType() == linker::LinkerPatch::Type::kBakerReadBarrierBranch ||
+ patch.GetType() == linker::LinkerPatch::Type::kCallRelative;
+}
+
+void CodeGeneratorARMVIXL::EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) {
+ arm::ArmVIXLAssembler assembler(GetGraph()->GetAllocator());
+ switch (patch.GetType()) {
+ case linker::LinkerPatch::Type::kCallRelative:
+ // The thunk just uses the entry point in the ArtMethod. This works even for calls
+ // to the generic JNI and interpreter trampolines.
+ assembler.LoadFromOffset(
+ arm::kLoadWord,
+ vixl32::pc,
+ vixl32::r0,
+ ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+ assembler.GetVIXLAssembler()->Bkpt(0);
+ if (GetCompilerOptions().GenerateAnyDebugInfo()) {
+ *debug_name = "MethodCallThunk";
+ }
+ break;
+ case linker::LinkerPatch::Type::kBakerReadBarrierBranch:
+ DCHECK_EQ(patch.GetBakerCustomValue2(), 0u);
+ CompileBakerReadBarrierThunk(assembler, patch.GetBakerCustomValue1(), debug_name);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected patch type " << patch.GetType();
+ UNREACHABLE();
+ }
+
+ // Ensure we emit the literal pool if any.
+ assembler.FinalizeCode();
+ code->resize(assembler.CodeSize());
+ MemoryRegion code_region(code->data(), code->size());
+ assembler.FinalizeInstructions(code_region);
+}
+
VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal(
uint32_t value,
Uint32ToLiteralMap* map) {
return map->GetOrCreate(
value,
[this, value]() {
- return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ value);
+ return GetAssembler()->CreateLiteralDestroyedWithPool<uint32_t>(/* value= */ value);
});
}
@@ -9366,9 +9292,9 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
CodeBufferCheckScope::kMaximumSize);
// TODO(VIXL): Think about using mov instead of movw.
__ bind(&labels->movw_label);
- __ movw(out, /* placeholder */ 0u);
+ __ movw(out, /* operand= */ 0u);
__ bind(&labels->movt_label);
- __ movt(out, /* placeholder */ 0u);
+ __ movt(out, /* operand= */ 0u);
__ bind(&labels->add_pc_label);
__ add(out, out, pc);
}
@@ -9377,5 +9303,224 @@ void CodeGeneratorARMVIXL::EmitMovwMovtPlaceholder(
#undef QUICK_ENTRY_POINT
#undef TODO_VIXL32
+#define __ assembler.GetVIXLAssembler()->
+
+static void EmitGrayCheckAndFastPath(ArmVIXLAssembler& assembler,
+ vixl32::Register base_reg,
+ vixl32::MemOperand& lock_word,
+ vixl32::Label* slow_path,
+ int32_t raw_ldr_offset,
+ vixl32::Label* throw_npe = nullptr) {
+ // Load the lock word containing the rb_state.
+ __ Ldr(ip, lock_word);
+ // Given the numeric representation, it's enough to check the low bit of the rb_state.
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
+ static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+ __ Tst(ip, Operand(LockWord::kReadBarrierStateMaskShifted));
+ __ B(ne, slow_path, /* is_far_target= */ false);
+ // To throw NPE, we return to the fast path; the artificial dependence below does not matter.
+ if (throw_npe != nullptr) {
+ __ Bind(throw_npe);
+ }
+ __ Add(lr, lr, raw_ldr_offset);
+ // Introduce a dependency on the lock_word including rb_state,
+ // to prevent load-load reordering, and without using
+ // a memory barrier (which would be more expensive).
+ __ Add(base_reg, base_reg, Operand(ip, LSR, 32));
+ __ Bx(lr); // And return back to the function.
+ // Note: The fake dependency is unnecessary for the slow path.
+}
+
+// Load the read barrier introspection entrypoint in register `entrypoint`
+static vixl32::Register LoadReadBarrierMarkIntrospectionEntrypoint(ArmVIXLAssembler& assembler) {
+ // The register where the read barrier introspection entrypoint is loaded
+ // is the marking register. We clobber it here and the entrypoint restores it to 1.
+ vixl32::Register entrypoint = mr;
+ // entrypoint = Thread::Current()->pReadBarrierMarkReg12, i.e. pReadBarrierMarkIntrospection.
+ DCHECK_EQ(ip.GetCode(), 12u);
+ const int32_t entry_point_offset =
+ Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ip.GetCode());
+ __ Ldr(entrypoint, MemOperand(tr, entry_point_offset));
+ return entrypoint;
+}
+
+void CodeGeneratorARMVIXL::CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name) {
+ BakerReadBarrierKind kind = BakerReadBarrierKindField::Decode(encoded_data);
+ switch (kind) {
+ case BakerReadBarrierKind::kField: {
+ vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ vixl32::Register holder_reg(BakerReadBarrierSecondRegField::Decode(encoded_data));
+ CheckValidReg(holder_reg.GetCode());
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ // If base_reg differs from holder_reg, the offset was too large and we must have emitted
+ // an explicit null check before the load. Otherwise, for implicit null checks, we need to
+ // null-check the holder as we do not necessarily do that check before going to the thunk.
+ vixl32::Label throw_npe_label;
+ vixl32::Label* throw_npe = nullptr;
+ if (GetCompilerOptions().GetImplicitNullChecks() && holder_reg.Is(base_reg)) {
+ throw_npe = &throw_npe_label;
+ __ CompareAndBranchIfZero(holder_reg, throw_npe, /* is_far_target= */ false);
+ }
+ // Check if the holder is gray and, if not, add fake dependency to the base register
+ // and return to the LDR instruction to load the reference. Otherwise, use introspection
+ // to load the reference and call the entrypoint that performs further checks on the
+ // reference and marks it if needed.
+ vixl32::Label slow_path;
+ MemOperand lock_word(holder_reg, mirror::Object::MonitorOffset().Int32Value());
+ const int32_t raw_ldr_offset = (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
+ : BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET;
+ EmitGrayCheckAndFastPath(
+ assembler, base_reg, lock_word, &slow_path, raw_ldr_offset, throw_npe);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
+ if (width == BakerReadBarrierWidth::kWide) {
+ MemOperand ldr_half_address(lr, ldr_offset + 2);
+ __ Ldrh(ip, ldr_half_address); // Load the LDR immediate half-word with "Rt | imm12".
+ __ Ubfx(ip, ip, 0, 12); // Extract the offset imm12.
+ __ Ldr(ip, MemOperand(base_reg, ip)); // Load the reference.
+ } else {
+ MemOperand ldr_address(lr, ldr_offset);
+ __ Ldrh(ip, ldr_address); // Load the LDR immediate, encoding T1.
+ __ Add(ep_reg, // Adjust the entrypoint address to the entrypoint
+ ep_reg, // for narrow LDR.
+ Operand(BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_ENTRYPOINT_OFFSET));
+ __ Ubfx(ip, ip, 6, 5); // Extract the imm5, i.e. offset / 4.
+ __ Ldr(ip, MemOperand(base_reg, ip, LSL, 2)); // Load the reference.
+ }
+ // Do not unpoison. With heap poisoning enabled, the entrypoint expects a poisoned reference.
+ __ Bx(ep_reg); // Jump to the entrypoint.
+ break;
+ }
+ case BakerReadBarrierKind::kArray: {
+ vixl32::Register base_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(base_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl32::Label slow_path;
+ int32_t data_offset =
+ mirror::Array::DataOffset(Primitive::ComponentSize(Primitive::kPrimNot)).Int32Value();
+ MemOperand lock_word(base_reg, mirror::Object::MonitorOffset().Int32Value() - data_offset);
+ DCHECK_LT(lock_word.GetOffsetImmediate(), 0);
+ const int32_t raw_ldr_offset = BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET;
+ EmitGrayCheckAndFastPath(assembler, base_reg, lock_word, &slow_path, raw_ldr_offset);
+ __ Bind(&slow_path);
+ const int32_t ldr_offset = /* Thumb state adjustment (LR contains Thumb state). */ -1 +
+ raw_ldr_offset;
+ MemOperand ldr_address(lr, ldr_offset + 2);
+ __ Ldrb(ip, ldr_address); // Load the LDR (register) byte with "00 | imm2 | Rm",
+ // i.e. Rm+32 because the scale in imm2 is 2.
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
+ __ Bfi(ep_reg, ip, 3, 6); // Insert ip to the entrypoint address to create
+ // a switch case target based on the index register.
+ __ Mov(ip, base_reg); // Move the base register to ip0.
+ __ Bx(ep_reg); // Jump to the entrypoint's array switch case.
+ break;
+ }
+ case BakerReadBarrierKind::kGcRoot:
+ case BakerReadBarrierKind::kUnsafeCas: {
+ // Check if the reference needs to be marked and if so (i.e. not null, not marked yet
+ // and it does not have a forwarding address), call the correct introspection entrypoint;
+ // otherwise return the reference (or the extracted forwarding address).
+ // There is no gray bit check for GC roots.
+ vixl32::Register root_reg(BakerReadBarrierFirstRegField::Decode(encoded_data));
+ CheckValidReg(root_reg.GetCode());
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ BakerReadBarrierWidth width = BakerReadBarrierWidthField::Decode(encoded_data);
+ UseScratchRegisterScope temps(assembler.GetVIXLAssembler());
+ temps.Exclude(ip);
+ vixl32::Label return_label, not_marked, forwarding_address;
+ __ CompareAndBranchIfZero(root_reg, &return_label, /* is_far_target= */ false);
+ MemOperand lock_word(root_reg, mirror::Object::MonitorOffset().Int32Value());
+ __ Ldr(ip, lock_word);
+ __ Tst(ip, LockWord::kMarkBitStateMaskShifted);
+ __ B(eq, &not_marked);
+ __ Bind(&return_label);
+ __ Bx(lr);
+ __ Bind(&not_marked);
+ static_assert(LockWord::kStateShift == 30 && LockWord::kStateForwardingAddress == 3,
+ "To use 'CMP ip, #modified-immediate; BHS', we need the lock word state in "
+ " the highest bits and the 'forwarding address' state to have all bits set");
+ __ Cmp(ip, Operand(0xc0000000));
+ __ B(hs, &forwarding_address);
+ vixl32::Register ep_reg = LoadReadBarrierMarkIntrospectionEntrypoint(assembler);
+ // Adjust the art_quick_read_barrier_mark_introspection address in kBakerCcEntrypointRegister
+ // to one of art_quick_read_barrier_mark_introspection_{gc_roots_{wide,narrow},unsafe_cas}.
+ DCHECK(kind != BakerReadBarrierKind::kUnsafeCas || width == BakerReadBarrierWidth::kWide);
+ int32_t entrypoint_offset =
+ (kind == BakerReadBarrierKind::kGcRoot)
+ ? (width == BakerReadBarrierWidth::kWide)
+ ? BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_ENTRYPOINT_OFFSET
+ : BAKER_MARK_INTROSPECTION_UNSAFE_CAS_ENTRYPOINT_OFFSET;
+ __ Add(ep_reg, ep_reg, Operand(entrypoint_offset));
+ __ Mov(ip, root_reg);
+ __ Bx(ep_reg);
+ __ Bind(&forwarding_address);
+ __ Lsl(root_reg, ip, LockWord::kForwardingAddressShift);
+ __ Bx(lr);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected kind: " << static_cast<uint32_t>(kind);
+ UNREACHABLE();
+ }
+
+ // For JIT, the slow path is considered part of the compiled method,
+ // so JIT should pass null as `debug_name`. Tests may not have a runtime.
+ DCHECK(Runtime::Current() == nullptr ||
+ !Runtime::Current()->UseJitCompilation() ||
+ debug_name == nullptr);
+ if (debug_name != nullptr && GetCompilerOptions().GenerateAnyDebugInfo()) {
+ std::ostringstream oss;
+ oss << "BakerReadBarrierThunk";
+ switch (kind) {
+ case BakerReadBarrierKind::kField:
+ oss << "Field";
+ if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+ oss << "Wide";
+ }
+ oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data)
+ << "_r" << BakerReadBarrierSecondRegField::Decode(encoded_data);
+ break;
+ case BakerReadBarrierKind::kArray:
+ oss << "Array_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ break;
+ case BakerReadBarrierKind::kGcRoot:
+ oss << "GcRoot";
+ if (BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide) {
+ oss << "Wide";
+ }
+ oss << "_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ break;
+ case BakerReadBarrierKind::kUnsafeCas:
+ oss << "UnsafeCas_r" << BakerReadBarrierFirstRegField::Decode(encoded_data);
+ DCHECK_EQ(kBakerReadBarrierInvalidEncodedReg,
+ BakerReadBarrierSecondRegField::Decode(encoded_data));
+ DCHECK(BakerReadBarrierWidthField::Decode(encoded_data) == BakerReadBarrierWidth::kWide);
+ break;
+ }
+ *debug_name = oss.str();
+ }
+}
+
+#undef __
+
} // namespace arm
} // namespace art
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 536da41d07..5edca87147 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -36,6 +36,11 @@
#pragma GCC diagnostic pop
namespace art {
+
+namespace linker {
+class Thumb2RelativePatcherTest;
+} // namespace linker
+
namespace arm {
// This constant is used as an approximate margin when emission of veneer and literal pools
@@ -173,9 +178,9 @@ class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventi
InvokeDexCallingConventionVisitorARMVIXL() {}
virtual ~InvokeDexCallingConventionVisitorARMVIXL() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type type) const OVERRIDE;
- Location GetMethodLocation() const OVERRIDE;
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConventionARMVIXL calling_convention;
@@ -188,25 +193,25 @@ class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention
public:
FieldAccessCallingConventionARMVIXL() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return helpers::LocationFrom(vixl::aarch32::r1);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return helpers::LocationFrom(vixl::aarch32::r0);
}
- Location GetReturnLocation(DataType::Type type) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type) const override {
return DataType::Is64BitType(type)
? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1)
: helpers::LocationFrom(vixl::aarch32::r0);
}
- Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE {
+ Location GetSetValueLocation(DataType::Type type, bool is_instance) const override {
return DataType::Is64BitType(type)
? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3)
: (is_instance
? helpers::LocationFrom(vixl::aarch32::r2)
: helpers::LocationFrom(vixl::aarch32::r1));
}
- Location GetFpuLocation(DataType::Type type) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type) const override {
return DataType::Is64BitType(type)
? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1)
: helpers::LocationFrom(vixl::aarch32::s0);
@@ -224,8 +229,8 @@ class SlowPathCodeARMVIXL : public SlowPathCode {
vixl::aarch32::Label* GetEntryLabel() { return &entry_label_; }
vixl::aarch32::Label* GetExitLabel() { return &exit_label_; }
- void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
- void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) OVERRIDE;
+ void SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
+ void RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) override;
private:
vixl::aarch32::Label entry_label_;
@@ -239,10 +244,10 @@ class ParallelMoveResolverARMVIXL : public ParallelMoveResolverWithSwap {
ParallelMoveResolverARMVIXL(ArenaAllocator* allocator, CodeGeneratorARMVIXL* codegen)
: ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
- void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
ArmVIXLAssembler* GetAssembler() const;
@@ -261,7 +266,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
@@ -269,7 +274,7 @@ class LocationsBuilderARMVIXL : public HGraphVisitor {
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -299,7 +304,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
InstructionCodeGeneratorARMVIXL(HGraph* graph, CodeGeneratorARMVIXL* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
@@ -307,7 +312,7 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -322,6 +327,9 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
vixl32::Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ vixl::aarch32::Register temp,
+ vixl::aarch32::FlagsUpdate flags_update);
void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
@@ -349,6 +357,12 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxLong(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFloat(HInstruction* minmax, bool is_min);
+ void GenerateMinMaxDouble(HInstruction* minmax, bool is_min);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -379,16 +393,6 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
uint32_t offset,
Location maybe_temp,
ReadBarrierOption read_barrier_option);
- // Generate a GC root reference load:
- //
- // root <- *(obj + offset)
- //
- // while honoring read barriers based on read_barrier_option.
- void GenerateGcRootFieldLoad(HInstruction* instruction,
- Location root,
- vixl::aarch32::Register obj,
- uint32_t offset,
- ReadBarrierOption read_barrier_option);
void GenerateTestAndBranch(HInstruction* instruction,
size_t condition_input_index,
vixl::aarch32::Label* true_target,
@@ -424,53 +428,55 @@ class InstructionCodeGeneratorARMVIXL : public InstructionCodeGenerator {
class CodeGeneratorARMVIXL : public CodeGenerator {
public:
CodeGeneratorARMVIXL(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorARMVIXL() {}
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
- void Bind(HBasicBlock* block) OVERRIDE;
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
+ void Bind(HBasicBlock* block) override;
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
- size_t GetWordSize() const OVERRIDE {
+ size_t GetWordSize() const override {
return static_cast<size_t>(kArmPointerSize);
}
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; }
+ size_t GetFloatingPointSpillSlotSize() const override { return vixl::aarch32::kRegSizeInBytes; }
- HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
- HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
+ HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
- ArmVIXLAssembler* GetAssembler() OVERRIDE { return &assembler_; }
+ ArmVIXLAssembler* GetAssembler() override { return &assembler_; }
- const ArmVIXLAssembler& GetAssembler() const OVERRIDE { return assembler_; }
+ const ArmVIXLAssembler& GetAssembler() const override { return assembler_; }
ArmVIXLMacroAssembler* GetVIXLAssembler() { return GetAssembler()->GetVIXLAssembler(); }
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
vixl::aarch32::Label* block_entry_label = GetLabelOf(block);
DCHECK(block_entry_label->IsBound());
return block_entry_label->GetLocation();
}
void FixJumpTables();
- void SetupBlockedRegisters() const OVERRIDE;
+ void SetupBlockedRegisters() const override;
+
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
+ InstructionSet GetInstructionSet() const override { return InstructionSet::kThumb2; }
+
+ const ArmInstructionSetFeatures& GetInstructionSetFeatures() const;
- ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
- InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; }
// Helper method to move a 32-bit value between two locations.
void Move32(Location destination, Location source);
@@ -489,7 +495,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -513,44 +519,42 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label);
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_.resize(GetGraph()->GetBlocks().size());
}
- void Finalize(CodeAllocator* allocator) OVERRIDE;
-
- const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
+ void Finalize(CodeAllocator* allocator) override;
- bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE {
+ bool NeedsTwoRegisters(DataType::Type type) const override {
return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64;
}
- void ComputeSpillMask() OVERRIDE;
+ void ComputeSpillMask() override;
vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
- void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE;
+ void MoveFromReturnRegister(Location trg, DataType::Type type) override;
// The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
// whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
@@ -574,6 +578,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Label add_pc_label;
};
+ PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data);
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
@@ -583,9 +589,9 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file,
dex::StringIndex string_index);
- // Add a new baker read barrier patch and return the label to be bound
- // before the BNE instruction.
- vixl::aarch32::Label* NewBakerReadBarrierPatch(uint32_t custom_data);
+ // Emit the BNE instruction for baker read barrier and record
+ // the associated patch for AOT or slow path for JIT.
+ void EmitBakerReadBarrierBne(uint32_t custom_data);
VIXLUInt32Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
VIXLUInt32Literal* DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -595,14 +601,40 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
dex::TypeIndex type_index,
Handle<mirror::Class> handle);
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ void LoadBootImageAddress(vixl::aarch32::Register reg, uint32_t boot_image_reference);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+ bool NeedsThunkCode(const linker::LinkerPatch& patch) const override;
+ void EmitThunkCode(const linker::LinkerPatch& patch,
+ /*out*/ ArenaVector<uint8_t>* code,
+ /*out*/ std::string* debug_name) override;
- // Maybe add the reserved entrypoint register as a temporary for field load. This temp
- // is added only for AOT compilation if link-time generated thunks for fields are enabled.
- void MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations);
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
+ // Generate a GC root reference load:
+ //
+ // root <- *(obj + offset)
+ //
+ // while honoring read barriers based on read_barrier_option.
+ void GenerateGcRootFieldLoad(HInstruction* instruction,
+ Location root,
+ vixl::aarch32::Register obj,
+ uint32_t offset,
+ ReadBarrierOption read_barrier_option);
+ // Generate ADD for UnsafeCASObject to reconstruct the old value from
+ // `old_value - expected` and mark it with Baker read barrier.
+ void GenerateUnsafeCasOldValueAddWithBakerReadBarrier(vixl::aarch32::Register old_value,
+ vixl::aarch32::Register adjusted_old_value,
+ vixl::aarch32::Register expected);
+ // Fast path implementation of ReadBarrier::Barrier for a heap
+ // reference field load when Baker's read barriers are used.
+ // Overload suitable for Unsafe.getObject/-Volatile() intrinsic.
+ void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+ Location ref,
+ vixl::aarch32::Register obj,
+ const vixl::aarch32::MemOperand& src,
+ bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
@@ -613,56 +645,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
bool needs_null_check);
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference array load when Baker's read barriers are used.
- void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
+ void GenerateArrayLoadWithBakerReadBarrier(Location ref,
vixl::aarch32::Register obj,
uint32_t data_offset,
Location index,
Location temp,
bool needs_null_check);
- // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
- // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
- //
- // Load the object reference located at the address
- // `obj + offset + (index << scale_factor)`, held by object `obj`, into
- // `ref`, and mark it if needed.
- void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- Location temp,
- bool needs_null_check);
-
- // Generate code checking whether the the reference field at the
- // address `obj + field_offset`, held by object `obj`, needs to be
- // marked, and if so, marking it and updating the field within `obj`
- // with the marked value.
- //
- // This routine is used for the implementation of the
- // UnsafeCASObject intrinsic with Baker read barriers.
- //
- // This method has a structure similar to
- // GenerateReferenceLoadWithBakerReadBarrier, but note that argument
- // `ref` is only as a temporary here, and thus its value should not
- // be used afterwards.
- void UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- Location field_offset,
- Location temp,
- bool needs_null_check,
- vixl::aarch32::Register temp2);
-
- // Generate a heap reference load (with no read barrier).
- void GenerateRawReferenceLoad(HInstruction* instruction,
- Location ref,
- vixl::aarch32::Register obj,
- uint32_t offset,
- Location index,
- ScaleFactor scale_factor,
- bool needs_null_check);
// Emit code checking the status of the Marking Register, and
// aborting the program if MR does not match the value stored in the
@@ -734,10 +722,10 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// artReadBarrierForRootSlow.
void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
- void GenerateNop() OVERRIDE;
+ void GenerateNop() override;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) {
jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARMVIXL(switch_instr));
@@ -757,6 +745,92 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
vixl::aarch32::Register temp = vixl32::Register());
private:
+ // Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
+
+ enum class BakerReadBarrierKind : uint8_t {
+ kField, // Field get or array get with constant offset (i.e. constant index).
+ kArray, // Array get with index in register.
+ kGcRoot, // GC root load.
+ kUnsafeCas, // UnsafeCASObject intrinsic.
+ kLast = kUnsafeCas
+ };
+
+ enum class BakerReadBarrierWidth : uint8_t {
+ kWide, // 32-bit LDR (and 32-bit NEG if heap poisoning is enabled).
+ kNarrow, // 16-bit LDR (and 16-bit NEG if heap poisoning is enabled).
+ kLast = kNarrow
+ };
+
+ static constexpr uint32_t kBakerReadBarrierInvalidEncodedReg = /* pc is invalid */ 15u;
+
+ static constexpr size_t kBitsForBakerReadBarrierKind =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierKind::kLast));
+ static constexpr size_t kBakerReadBarrierBitsForRegister =
+ MinimumBitsToStore(kBakerReadBarrierInvalidEncodedReg);
+ using BakerReadBarrierKindField =
+ BitField<BakerReadBarrierKind, 0, kBitsForBakerReadBarrierKind>;
+ using BakerReadBarrierFirstRegField =
+ BitField<uint32_t, kBitsForBakerReadBarrierKind, kBakerReadBarrierBitsForRegister>;
+ using BakerReadBarrierSecondRegField =
+ BitField<uint32_t,
+ kBitsForBakerReadBarrierKind + kBakerReadBarrierBitsForRegister,
+ kBakerReadBarrierBitsForRegister>;
+ static constexpr size_t kBitsForBakerReadBarrierWidth =
+ MinimumBitsToStore(static_cast<size_t>(BakerReadBarrierWidth::kLast));
+ using BakerReadBarrierWidthField =
+ BitField<BakerReadBarrierWidth,
+ kBitsForBakerReadBarrierKind + 2 * kBakerReadBarrierBitsForRegister,
+ kBitsForBakerReadBarrierWidth>;
+
+ static void CheckValidReg(uint32_t reg) {
+ DCHECK(reg < vixl::aarch32::ip.GetCode() && reg != mr.GetCode()) << reg;
+ }
+
+ static uint32_t EncodeBakerReadBarrierFieldData(uint32_t base_reg,
+ uint32_t holder_reg,
+ bool narrow) {
+ CheckValidReg(base_reg);
+ CheckValidReg(holder_reg);
+ DCHECK(!narrow || base_reg < 8u) << base_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kField) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(holder_reg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
+ static uint32_t EncodeBakerReadBarrierArrayData(uint32_t base_reg) {
+ CheckValidReg(base_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kArray) |
+ BakerReadBarrierFirstRegField::Encode(base_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+ }
+
+ static uint32_t EncodeBakerReadBarrierGcRootData(uint32_t root_reg, bool narrow) {
+ CheckValidReg(root_reg);
+ DCHECK(!narrow || root_reg < 8u) << root_reg;
+ BakerReadBarrierWidth width =
+ narrow ? BakerReadBarrierWidth::kNarrow : BakerReadBarrierWidth::kWide;
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kGcRoot) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(width);
+ }
+
+ static uint32_t EncodeBakerReadBarrierUnsafeCasData(uint32_t root_reg) {
+ CheckValidReg(root_reg);
+ return BakerReadBarrierKindField::Encode(BakerReadBarrierKind::kUnsafeCas) |
+ BakerReadBarrierFirstRegField::Encode(root_reg) |
+ BakerReadBarrierSecondRegField::Encode(kBakerReadBarrierInvalidEncodedReg) |
+ BakerReadBarrierWidthField::Encode(BakerReadBarrierWidth::kWide);
+ }
+
+ void CompileBakerReadBarrierThunk(ArmVIXLAssembler& assembler,
+ uint32_t encoded_data,
+ /*out*/ std::string* debug_name);
+
vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
vixl::aarch32::Register temp);
@@ -794,11 +868,11 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ParallelMoveResolverARMVIXL move_resolver_;
ArmVIXLAssembler assembler_;
- const ArmInstructionSetFeatures& isa_features_;
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -806,10 +880,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
// Baker read barrier patch info.
ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
@@ -818,6 +894,20 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
// Patches for class literals in JIT compiled code.
TypeToLiteralMap jit_class_patches_;
+ // Baker read barrier slow paths, mapping custom data (uint32_t) to label.
+ // Wrap the label to work around vixl::aarch32::Label being non-copyable
+ // and non-moveable and as such unusable in ArenaSafeMap<>.
+ struct LabelWrapper {
+ LabelWrapper(const LabelWrapper& src)
+ : label() {
+ DCHECK(!src.label.IsReferenced() && !src.label.IsBound());
+ }
+ LabelWrapper() = default;
+ vixl::aarch32::Label label;
+ };
+ ArenaSafeMap<uint32_t, LabelWrapper> jit_baker_read_barrier_slow_paths_;
+
+ friend class linker::Thumb2RelativePatcherTest;
DISALLOW_COPY_AND_ASSIGN(CodeGeneratorARMVIXL);
};
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 87e6d6834b..72334afa40 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -26,6 +26,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_mips.h"
@@ -146,7 +147,7 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type t
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
- break;
+ UNREACHABLE();
}
// Space on the stack is reserved for all arguments.
@@ -159,6 +160,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type)
return MipsReturnLocation(type);
}
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // The reference is returned in the same register. This differs from the standard return location.
+ return caller_saves;
+}
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<CodeGeneratorMIPS*>(codegen)->GetAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMipsPointerSize, x).Int32Value()
@@ -167,7 +176,7 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
public:
explicit BoundsCheckSlowPathMIPS(HBoundsCheck* instruction) : SlowPathCodeMIPS(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
@@ -192,9 +201,9 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS);
@@ -204,16 +213,16 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
public:
explicit DivZeroCheckSlowPathMIPS(HDivZeroCheck* instruction) : SlowPathCodeMIPS(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
mips_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS);
@@ -221,35 +230,41 @@ class DivZeroCheckSlowPathMIPS : public SlowPathCodeMIPS {
class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
public:
- LoadClassSlowPathMIPS(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCodeMIPS(at),
- cls_(cls),
- dex_pc_(dex_pc),
- do_clinit_(do_clinit) {
+ LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at)
+ : SlowPathCodeMIPS(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
- InvokeRuntimeCallingConvention calling_convention;
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- dex::TypeIndex type_index = cls_->GetTypeIndex();
- __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
- QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
- : kQuickInitializeType;
- mips_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), mips_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
+ mips_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ source,
+ cls_->GetType());
+ }
+ if (must_do_clinit) {
+ mips_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
@@ -265,18 +280,12 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathMIPS"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS);
};
@@ -285,7 +294,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
explicit LoadStringSlowPathMIPS(HLoadString* instruction)
: SlowPathCodeMIPS(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
DCHECK(instruction_->IsLoadString());
DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
@@ -309,7 +318,7 @@ class LoadStringSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS);
@@ -319,7 +328,7 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
public:
explicit NullCheckSlowPathMIPS(HNullCheck* instr) : SlowPathCodeMIPS(instr) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -333,9 +342,9 @@ class NullCheckSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS);
@@ -346,7 +355,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
SuspendCheckSlowPathMIPS(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeMIPS(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
@@ -366,7 +375,7 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS {
return &return_label_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS"; }
HBasicBlock* GetSuccessor() const {
return successor_;
@@ -387,7 +396,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal)
: SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
uint32_t dex_pc = instruction_->GetDexPc();
DCHECK(instruction_->IsCheckCast()
@@ -426,9 +435,9 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathMIPS"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -441,7 +450,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
explicit DeoptimizationSlowPathMIPS(HDeoptimize* instruction)
: SlowPathCodeMIPS(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -453,7 +462,7 @@ class DeoptimizationSlowPathMIPS : public SlowPathCodeMIPS {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
@@ -463,7 +472,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
public:
explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -494,7 +503,7 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathMIPS"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS);
@@ -524,9 +533,9 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -578,7 +587,7 @@ class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
this,
- /* direct */ false);
+ /* direct= */ false);
}
__ B(GetExitLabel());
}
@@ -618,11 +627,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS";
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -672,7 +681,7 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
instruction_,
this,
- /* direct */ false);
+ /* direct= */ false);
// If the new reference is different from the old reference,
// update the field in the holder (`*(obj_ + field_offset_)`).
@@ -789,7 +798,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
@@ -913,7 +922,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
+ const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
private:
Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
@@ -956,7 +965,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -986,7 +995,7 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS"; }
private:
const Location out_;
@@ -996,7 +1005,6 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
};
CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
- const MipsInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
@@ -1013,8 +1021,8 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator(), &isa_features),
- isa_features_(isa_features),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsMipsInstructionSetFeatures()),
uint32_literals_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -1023,6 +1031,7 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
clobbered_ra_(false) {
@@ -1042,8 +1051,7 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) {
// Adjust native pc offsets in stack maps.
StackMapStream* stack_map_stream = GetStackMapStream();
for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
- uint32_t old_position =
- stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips);
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
uint32_t new_position = __ GetAdjustedPosition(old_position);
DCHECK_GE(new_position, old_position);
stack_map_stream->SetStackMapNativePcOffset(i, new_position);
@@ -1159,9 +1167,9 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) {
__ Move(r2_l, TMP);
__ Move(r2_h, AT);
} else if (loc1.IsStackSlot() && loc2.IsStackSlot()) {
- Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false);
+ Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ false);
} else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) {
- Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
+ Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot= */ true);
} else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) {
ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex());
} else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
@@ -1597,6 +1605,15 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1605,7 +1622,8 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- string_bss_entry_patches_.size();
+ string_bss_entry_patches_.size() +
+ boot_image_intrinsic_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
@@ -1614,12 +1632,14 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1630,6 +1650,20 @@ void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* link
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -1703,7 +1737,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo
__ Bind(&info_high->label);
__ Bind(&info_high->pc_rel_label);
// Add the high half of a 32-bit offset to PC.
- __ Auipc(out, /* placeholder */ 0x1234);
+ __ Auipc(out, /* imm16= */ 0x1234);
__ SetReorder(reordering);
} else {
// If base is ZERO, emit NAL to obtain the actual base.
@@ -1712,7 +1746,7 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo
__ Nal();
}
__ Bind(&info_high->label);
- __ Lui(out, /* placeholder */ 0x1234);
+ __ Lui(out, /* imm16= */ 0x1234);
// If we emitted the NAL, bind the pc_rel_label, otherwise base is a register holding
// the HMipsComputeBaseMethodAddress which has its own label stored in MipsAssembler.
if (base == ZERO) {
@@ -1726,6 +1760,48 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo
// offset to `out` (e.g. lw, jialc, addiu).
}
+void CodeGeneratorMIPS::LoadBootImageAddress(Register reg, uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference);
+ PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, /* base= */ ZERO);
+ __ Addiu(reg, TMP, /* imm16= */ 0x5678, &info_low->label);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, reg, /* base= */ ZERO);
+ __ Lw(reg, reg, /* imm16= */ 0x5678, &info_low->label);
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
+ __ LoadConst32(reg, dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address)));
+ }
+}
+
+void CodeGeneratorMIPS::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx);
+ PcRelativePatchInfo* info_low =
+ NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, argument, /* base= */ ZERO);
+ __ Addiu(argument, argument, /* imm16= */ 0x5678, &info_low->label);
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch(
const DexFile& dex_file,
dex::StringIndex string_index,
@@ -1792,12 +1868,27 @@ void CodeGeneratorMIPS::MarkGCCard(Register object,
if (value_can_be_null) {
__ Beqz(value, &done);
}
+ // Load the address of the card table into `card`.
__ LoadFromOffset(kLoadWord,
card,
TR,
Thread::CardTableOffset<kMipsPointerSize>().Int32Value());
+ // Calculate the address of the card corresponding to `object`.
__ Srl(temp, object, gc::accounting::CardTable::kCardShift);
__ Addu(temp, card, temp);
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the SB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ Sb(card, temp, 0);
if (value_can_be_null) {
__ Bind(&done);
@@ -1882,6 +1973,10 @@ void CodeGeneratorMIPS::DumpFloatingPointRegister(std::ostream& stream, int reg)
stream << FRegister(reg);
}
+const MipsInstructionSetFeatures& CodeGeneratorMIPS::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsMipsInstructionSetFeatures();
+}
+
constexpr size_t kMipsDirectEntrypointRuntimeOffset = 16;
void CodeGeneratorMIPS::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -1936,6 +2031,34 @@ void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCode
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // Only stype 0 is supported.
}
@@ -2456,7 +2579,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Or(dst_high, dst_high, TMP);
__ Andi(TMP, rhs_reg, kMipsBitsPerWord);
if (isR6) {
- __ Beqzc(TMP, &done, /* is_bare */ true);
+ __ Beqzc(TMP, &done, /* is_bare= */ true);
__ Move(dst_high, dst_low);
__ Move(dst_low, ZERO);
} else {
@@ -2472,7 +2595,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Or(dst_low, dst_low, TMP);
__ Andi(TMP, rhs_reg, kMipsBitsPerWord);
if (isR6) {
- __ Beqzc(TMP, &done, /* is_bare */ true);
+ __ Beqzc(TMP, &done, /* is_bare= */ true);
__ Move(dst_low, dst_high);
__ Sra(dst_high, dst_high, 31);
} else {
@@ -2489,7 +2612,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Or(dst_low, dst_low, TMP);
__ Andi(TMP, rhs_reg, kMipsBitsPerWord);
if (isR6) {
- __ Beqzc(TMP, &done, /* is_bare */ true);
+ __ Beqzc(TMP, &done, /* is_bare= */ true);
__ Move(dst_low, dst_high);
__ Move(dst_high, ZERO);
} else {
@@ -2508,7 +2631,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) {
__ Or(dst_high, dst_high, TMP);
__ Andi(TMP, rhs_reg, kMipsBitsPerWord);
if (isR6) {
- __ Beqzc(TMP, &done, /* is_bare */ true);
+ __ Beqzc(TMP, &done, /* is_bare= */ true);
__ Move(TMP, dst_high);
__ Move(dst_high, dst_low);
__ Move(dst_low, TMP);
@@ -2739,7 +2862,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
obj,
offset,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
out_loc,
@@ -2747,7 +2870,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
data_offset,
index,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
}
} else {
Register out = out_loc.AsRegister<Register>();
@@ -3287,7 +3410,13 @@ void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -3296,7 +3425,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
Register temp = temp_loc.AsRegister<Register>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -3335,7 +3464,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bne(temp, cls, slow_path->GetEntryLabel());
+ __ Bne(temp, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
break;
}
@@ -3361,7 +3490,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqz(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bne(temp, cls, &loop);
+ __ Bne(temp, cls.AsRegister<Register>(), &loop);
break;
}
@@ -3376,7 +3505,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop;
__ Bind(&loop);
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -3399,7 +3528,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beq(temp, cls, &done);
+ __ Beq(temp, cls.AsRegister<Register>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3458,7 +3587,21 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
// Go to next interface.
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bne(AT, cls, &loop);
+ __ Bne(AT, cls.AsRegister<Register>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnez(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -3474,15 +3617,14 @@ void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
}
void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) {
// We assume the class is not null.
- SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(
- check->GetLoadClass(),
- check,
- check->GetDexPc(),
- true);
+ SlowPathCodeMIPS* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path,
check->GetLocations()->InAt(0).AsRegister<Register>());
@@ -3962,7 +4104,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperatio
int64_t magic;
int shift;
- CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
@@ -5806,7 +5948,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
MipsLabel* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -5825,9 +5967,9 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeMIPS* slow_path =
deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS>(deoptimize);
GenerateTestAndBranch(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
// This function returns true if a conditional move can be generated for HSelect.
@@ -5841,7 +5983,7 @@ void InstructionCodeGeneratorMIPS::VisitDeoptimize(HDeoptimize* deoptimize) {
// of common logic.
static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) {
bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition());
- HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HInstruction* cond = select->InputAt(/* i= */ 2);
HCondition* condition = cond->AsCondition();
DataType::Type cond_type =
@@ -6074,7 +6216,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) {
Location src = locations->InAt(1);
Register src_reg = ZERO;
Register src_reg_high = ZERO;
- HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HInstruction* cond = select->InputAt(/* i= */ 2);
Register cond_reg = TMP;
int cond_cc = 0;
DataType::Type cond_type = DataType::Type::kInt32;
@@ -6082,7 +6224,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) {
DataType::Type dst_type = select->GetType();
if (IsBooleanValueOrMaterializedCondition(cond)) {
- cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+ cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>();
} else {
HCondition* condition = cond->AsCondition();
LocationSummary* cond_locations = cond->GetLocations();
@@ -6195,7 +6337,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) {
Location dst = locations->Out();
Location false_src = locations->InAt(0);
Location true_src = locations->InAt(1);
- HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HInstruction* cond = select->InputAt(/* i= */ 2);
Register cond_reg = TMP;
FRegister fcond_reg = FTMP;
DataType::Type cond_type = DataType::Type::kInt32;
@@ -6203,7 +6345,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) {
DataType::Type dst_type = select->GetType();
if (IsBooleanValueOrMaterializedCondition(cond)) {
- cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+ cond_reg = locations->InAt(/* at= */ 2).AsRegister<Register>();
} else {
HCondition* condition = cond->AsCondition();
LocationSummary* cond_locations = cond->GetLocations();
@@ -6384,7 +6526,7 @@ void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
- if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) {
+ if (CanMoveConditionally(select, is_r6, /* locations_to_set= */ nullptr)) {
if (is_r6) {
GenConditionalMoveR6(select);
} else {
@@ -6394,8 +6536,8 @@ void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
LocationSummary* locations = select->GetLocations();
MipsLabel false_target;
GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
+ /* condition_input_index= */ 2,
+ /* true_target= */ nullptr,
&false_target);
codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
__ Bind(&false_target);
@@ -6554,7 +6696,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction,
obj,
offset,
temp_loc,
- /* needs_null_check */ true);
+ /* needs_null_check= */ true);
if (is_volatile) {
GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -6787,7 +6929,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
out_reg,
offset,
maybe_temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -6828,7 +6970,7 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
obj_reg,
offset,
maybe_temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6919,7 +7061,7 @@ void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruc
__ AddUpper(base, obj, offset_high);
}
MipsLabel skip_call;
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
if (label_low != nullptr) {
DCHECK(short_offset);
__ Bind(label_low);
@@ -7074,11 +7216,11 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst
MipsLabel skip_call;
if (short_offset) {
if (isR6) {
- __ Beqzc(T9, &skip_call, /* is_bare */ true);
+ __ Beqzc(T9, &skip_call, /* is_bare= */ true);
__ Nop(); // In forbidden slot.
__ Jialc(T9, thunk_disp);
} else {
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
__ Nop(); // In delay slot.
@@ -7086,13 +7228,13 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst
__ Bind(&skip_call);
} else {
if (isR6) {
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Aui(base, obj, offset_high); // In delay slot.
__ Jialc(T9, thunk_disp);
__ Bind(&skip_call);
} else {
__ Lui(base, offset_high);
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
__ Bind(&skip_call);
@@ -7169,7 +7311,7 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst
// We will not do the explicit null check in the thunk as some form of a null check
// must've been done earlier.
DCHECK(!needs_null_check);
- const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false);
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
@@ -7179,13 +7321,13 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst
: index.AsRegister<Register>();
MipsLabel skip_call;
if (GetInstructionSetFeatures().IsR6()) {
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot.
__ Jialc(T9, thunk_disp);
__ Bind(&skip_call);
} else {
__ Sll(TMP, index_reg, scale_factor);
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Addiu(T9, T9, thunk_disp); // In delay slot.
__ Jalr(T9);
__ Bind(&skip_call);
@@ -7300,7 +7442,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction,
ref,
obj,
- /* field_offset */ index,
+ /* field_offset= */ index,
temp_reg);
} else {
slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS(instruction, ref);
@@ -7312,7 +7454,7 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction*
// Given the numeric representation, it's enough to check the low bit of the
// rb_state. We do that by shifting the bit into the sign bit (31) and
// performing a branch on less than zero.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
__ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
@@ -7401,6 +7543,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -7409,7 +7553,13 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -7421,7 +7571,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
Register obj = obj_loc.AsRegister<Register>();
- Register cls = locations->InAt(1).AsRegister<Register>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
Register out = out_loc.AsRegister<Register>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -7453,7 +7603,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
read_barrier_option);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<Register>());
__ Sltiu(out, out, 1);
break;
}
@@ -7480,7 +7630,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqz(out, &done);
- __ Bne(out, cls, &loop);
+ __ Bne(out, cls.AsRegister<Register>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -7498,7 +7648,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
MipsLabel loop, success;
__ Bind(&loop);
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -7525,7 +7675,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// Do an exact check.
MipsLabel success;
- __ Beq(out, cls, &success);
+ __ Beq(out, cls.AsRegister<Register>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -7555,9 +7705,9 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
kWithoutReadBarrier);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
- __ Bne(out, cls, slow_path->GetEntryLabel());
+ __ Bne(out, cls.AsRegister<Register>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -7584,11 +7734,25 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ B(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
@@ -7712,6 +7876,14 @@ void InstructionCodeGeneratorMIPS::VisitInvokePolymorphic(HInvokePolymorphic* in
codegen_->GenerateInvokePolymorphicCall(invoke);
}
+void LocationsBuilderMIPS::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorMIPS::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+}
+
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen) {
if (invoke->GetLocations()->Intrinsified()) {
IntrinsicCodeGeneratorMIPS intrinsic(codegen);
@@ -7725,14 +7897,14 @@ HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -7748,14 +7920,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -7792,7 +7964,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
return desired_dispatch_info;
}
@@ -7829,12 +8001,18 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high);
Register temp_reg = temp.AsRegister<Register>();
EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
- __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
+ __ Addiu(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label);
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ Register temp_reg = temp.AsRegister<Register>();
+ EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
+ __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
@@ -7842,9 +8020,12 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high);
Register temp_reg = temp.AsRegister<Register>();
EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg);
- __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label);
+ __ Lw(temp_reg, TMP, /* imm16= */ 0x5678, &info_low->label);
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -7955,14 +8136,14 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kJitBootImageAddress:
if (isR6) {
break;
}
if (has_irreducible_loops) {
- if (load_kind != HLoadClass::LoadKind::kBootImageAddress) {
+ if (load_kind != HLoadClass::LoadKind::kJitBootImageAddress) {
codegen_->ClobberRA();
}
break;
@@ -7978,10 +8159,7 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barriers we have a temp-clobbering call.
}
@@ -8007,9 +8185,9 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kJitBootImageAddress:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
@@ -8048,39 +8226,20 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
- __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
+ __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label);
break;
}
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- if (isR6 || !has_irreducible_loops) {
- __ LoadLiteral(out,
- base_or_current_method_reg,
- codegen_->DeduplicateBootImageAddressLiteral(address));
- } else {
- __ LoadConst32(out, address);
- }
- break;
- }
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
- __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Addiu(out, out, -masked_hash);
- }
+ __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label);
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -8094,24 +8253,37 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
GenerateGcRootFieldLoad(cls,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
read_barrier_option,
&info_low->label);
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ if (isR6 || !has_irreducible_loops) {
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ __ LoadConst32(out, address);
+ }
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress: {
CodeGeneratorMIPS::JitPatchInfo* info = codegen_->NewJitRootClassPatch(cls->GetDexFile(),
cls->GetTypeIndex(),
cls->GetClass());
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
- __ Lui(out, /* placeholder */ 0x1234);
+ __ Lui(out, /* imm16= */ 0x1234);
__ SetReorder(reordering);
GenerateGcRootFieldLoad(cls,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
read_barrier_option,
&info->low_label);
break;
@@ -8124,8 +8296,8 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
- SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ SlowPathCodeMIPS* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ Beqz(out, slow_path->GetEntryLabel());
@@ -8138,6 +8310,26 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF
}
}
+void LocationsBuilderMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderMIPS::VisitLoadMethodType(HLoadMethodType* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc);
+}
+
+void InstructionCodeGeneratorMIPS::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+}
+
static int32_t GetExceptionTlsOffset() {
return Thread::ExceptionOffset<kMipsPointerSize>().Int32Value();
}
@@ -8169,15 +8361,15 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
+ case HLoadString::LoadKind::kJitBootImageAddress:
if (isR6) {
break;
}
if (has_irreducible_loops) {
- if (load_kind != HLoadString::LoadKind::kBootImageAddress) {
+ if (load_kind != HLoadString::LoadKind::kJitBootImageAddress) {
codegen_->ClobberRA();
}
break;
@@ -8198,10 +8390,7 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) {
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barriers we have a temp-clobbering call.
}
@@ -8221,10 +8410,10 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
switch (load_kind) {
// We need an extra register for PC-relative literals on R2.
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
+ case HLoadString::LoadKind::kJitBootImageAddress:
base_or_current_method_reg =
(isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>();
break;
@@ -8243,36 +8432,23 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
- __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label);
+ __ Addiu(out, out, /* imm16= */ 0x5678, &info_low->label);
return;
}
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- if (isR6 || !has_irreducible_loops) {
- __ LoadLiteral(out,
- base_or_current_method_reg,
- codegen_->DeduplicateBootImageAddressLiteral(address));
- } else {
- __ LoadConst32(out, address);
- }
- return;
- }
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high,
out,
base_or_current_method_reg);
- __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label);
+ __ Lw(out, out, /* imm16= */ 0x5678, &info_low->label);
return;
}
case HLoadString::LoadKind::kBssEntry: {
- DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS::PcRelativePatchInfo* info_high =
codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
CodeGeneratorMIPS::PcRelativePatchInfo* info_low =
@@ -8283,7 +8459,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
GenerateGcRootFieldLoad(load,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
kCompilerReadBarrierOption,
&info_low->label);
SlowPathCodeMIPS* slow_path =
@@ -8293,6 +8469,18 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ if (isR6 || !has_irreducible_loops) {
+ __ LoadLiteral(out,
+ base_or_current_method_reg,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ } else {
+ __ LoadConst32(out, address);
+ }
+ return;
+ }
case HLoadString::LoadKind::kJitTableAddress: {
CodeGeneratorMIPS::JitPatchInfo* info =
codegen_->NewJitRootStringPatch(load->GetDexFile(),
@@ -8300,12 +8488,12 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_
load->GetString());
bool reordering = __ SetReorder(false);
__ Bind(&info->high_label);
- __ Lui(out, /* placeholder */ 0x1234);
+ __ Lui(out, /* imm16= */ 0x1234);
__ SetReorder(reordering);
GenerateGcRootFieldLoad(load,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
kCompilerReadBarrierOption,
&info->low_label);
return;
@@ -8513,10 +8701,8 @@ void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
@@ -8526,30 +8712,13 @@ void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize);
- __ LoadFromOffset(kLoadWord, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
- __ LoadFromOffset(kLoadWord, T9, temp, code_offset.Int32Value());
- __ Jalr(T9);
- __ NopIfNoReordering();
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
void LocationsBuilderMIPS::VisitNot(HNot* instruction) {
@@ -8779,6 +8948,501 @@ void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ if (isR6) {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
+ // always change the target (output) register. If the condition is
+ // true the output register gets the contents of the "rs" register;
+ // otherwise, the output register is set to zero. One consequence
+ // of this is that to implement something like "rd = c==0 ? rs : rt"
+ // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
+ // After executing this pair of instructions one of the output
+ // registers from the pair will necessarily contain zero. Then the
+ // code ORs the output registers from the SELEQZ/SELNEZ instructions
+ // to get the final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, b_hi, a_hi);
+ __ Bne(b_hi, a_hi, &compare_done);
+
+ __ Sltu(TMP, b_lo, a_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ __ Seleqz(AT, a_lo, TMP);
+ __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
+ // because at this point we're
+ // done using a_lo/b_lo.
+ } else {
+ __ Selnez(AT, a_lo, TMP);
+ __ Seleqz(out_lo, b_lo, TMP); // ditto
+ }
+ __ Or(out_lo, out_lo, AT);
+ if (is_min) {
+ __ Seleqz(AT, a_hi, TMP);
+ __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ } else {
+ __ Selnez(AT, a_hi, TMP);
+ __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
+ }
+ __ Or(out_hi, out_hi, AT);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, b, a);
+ if (is_min) {
+ __ Seleqz(TMP, a, AT);
+ __ Selnez(AT, b, AT);
+ } else {
+ __ Selnez(TMP, a, AT);
+ __ Seleqz(AT, b, AT);
+ }
+ __ Or(out, TMP, AT);
+ }
+ }
+ } else { // !isR6
+ if (type == DataType::Type::kInt64) {
+ Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
+ Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+
+ MipsLabel compare_done;
+
+ if (a_lo == b_lo) {
+ if (out_lo != a_lo) {
+ __ Move(out_lo, a_lo);
+ __ Move(out_hi, a_hi);
+ }
+ } else {
+ __ Slt(TMP, a_hi, b_hi);
+ __ Bne(a_hi, b_hi, &compare_done);
+
+ __ Sltu(TMP, a_lo, b_lo);
+
+ __ Bind(&compare_done);
+
+ if (is_min) {
+ if (out_lo != a_lo) {
+ __ Movn(out_hi, a_hi, TMP);
+ __ Movn(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movz(out_hi, b_hi, TMP);
+ __ Movz(out_lo, b_lo, TMP);
+ }
+ } else {
+ if (out_lo != a_lo) {
+ __ Movz(out_hi, a_hi, TMP);
+ __ Movz(out_lo, a_lo, TMP);
+ }
+ if (out_lo != b_lo) {
+ __ Movn(out_hi, b_hi, TMP);
+ __ Movn(out_lo, b_lo, TMP);
+ }
+ }
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register a = locations->InAt(0).AsRegister<Register>();
+ Register b = locations->InAt(1).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+
+ if (a == b) {
+ if (out != a) {
+ __ Move(out, a);
+ }
+ } else {
+ __ Slt(AT, a, b);
+ if (is_min) {
+ if (out != a) {
+ __ Movn(out, a, AT);
+ }
+ if (out != b) {
+ __ Movz(out, b, AT);
+ }
+ } else {
+ if (out != a) {
+ __ Movz(out, a, AT);
+ }
+ if (out != b) {
+ __ Movn(out, b, AT);
+ }
+ }
+ }
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ bool isR6,
+ DataType::Type type) {
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+ FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
+
+ if (isR6) {
+ MipsLabel noNaNs;
+ MipsLabel done;
+ FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ B(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+
+ } else { // !isR6
+ MipsLabel ordered;
+ MipsLabel compare;
+ MipsLabel select;
+ MipsLabel done;
+
+ if (type == DataType::Type::kFloat64) {
+ __ CunD(a, b);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CunS(a, b);
+ }
+ __ Bc1f(&ordered);
+
+ // a or b (or both) is a NaN. Return one, which is a NaN.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(b, b);
+ } else {
+ __ CeqS(b, b);
+ }
+ __ B(&select);
+
+ __ Bind(&ordered);
+
+ // Neither is a NaN.
+ // a == b? (-0.0 compares equal with +0.0)
+ // If equal, handle zeroes, else compare further.
+ if (type == DataType::Type::kFloat64) {
+ __ CeqD(a, b);
+ } else {
+ __ CeqS(a, b);
+ }
+ __ Bc1f(&compare);
+
+ // a == b either bit for bit or one is -0.0 and the other is +0.0.
+ if (type == DataType::Type::kFloat64) {
+ __ MoveFromFpuHigh(TMP, a);
+ __ MoveFromFpuHigh(AT, b);
+ } else {
+ __ Mfc1(TMP, a);
+ __ Mfc1(AT, b);
+ }
+
+ if (is_min) {
+ // -0.0 prevails over +0.0.
+ __ Or(TMP, TMP, AT);
+ } else {
+ // +0.0 prevails over -0.0.
+ __ And(TMP, TMP, AT);
+ }
+
+ if (type == DataType::Type::kFloat64) {
+ __ Mfc1(AT, a);
+ __ Mtc1(AT, out);
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ __ Mtc1(TMP, out);
+ }
+ __ B(&done);
+
+ __ Bind(&compare);
+
+ if (type == DataType::Type::kFloat64) {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeD(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeD(b, a); // b <= a
+ }
+ } else {
+ if (is_min) {
+ // return (a <= b) ? a : b;
+ __ ColeS(a, b);
+ } else {
+ // return (a >= b) ? a : b;
+ __ ColeS(b, a); // b <= a
+ }
+ }
+
+ __ Bind(&select);
+
+ if (type == DataType::Type::kFloat64) {
+ __ MovtD(out, a);
+ __ MovfD(out, b);
+ } else {
+ __ MovtS(out, a);
+ __ MovfS(out, b);
+ }
+
+ __ Bind(&done);
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, isR6, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateAbsFP(LocationSummary* locations,
+ DataType::Type type,
+ bool isR2OrNewer,
+ bool isR6) {
+ FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
+ FRegister out = locations->Out().AsFpuRegister<FRegister>();
+
+ // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
+ // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
+ // (signaling NaN may become quiet though).
+ //
+ // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
+ // both regular floating point numbers and NAN values are treated alike, only the sign bit is
+ // affected by this instruction.
+ // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
+ // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
+ // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
+ if (isR6) {
+ if (type == DataType::Type::kFloat64) {
+ __ AbsD(out, in);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ AbsS(out, in);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ if (in != out) {
+ __ MovD(out, in);
+ }
+ __ MoveFromFpuHigh(TMP, in);
+ // ins instruction is not available for R1.
+ if (isR2OrNewer) {
+ __ Ins(TMP, ZERO, 31, 1);
+ } else {
+ __ Sll(TMP, TMP, 1);
+ __ Srl(TMP, TMP, 1);
+ }
+ __ MoveToFpuHigh(TMP, out);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ Mfc1(TMP, in);
+ // ins instruction is not available for R1.
+ if (isR2OrNewer) {
+ __ Ins(TMP, ZERO, 31, 1);
+ } else {
+ __ Sll(TMP, TMP, 1);
+ __ Srl(TMP, TMP, 1);
+ }
+ __ Mtc1(TMP, out);
+ }
+ }
+}
+
+void InstructionCodeGeneratorMIPS::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ bool isR2OrNewer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2();
+ bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ Register out = locations->Out().AsRegister<Register>();
+ __ Sra(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Subu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register out_lo = locations->Out().AsRegisterPairLow<Register>();
+ Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
+ // The comments in this section show the analogous operations which would
+ // be performed if we had 64-bit registers "in", and "out".
+ // __ Dsra32(AT, in, 31);
+ __ Sra(AT, in_hi, 31);
+ // __ Xor(out, in, AT);
+ __ Xor(TMP, in_lo, AT);
+ __ Xor(out_hi, in_hi, AT);
+ // __ Dsubu(out, out, AT);
+ __ Subu(out_lo, TMP, AT);
+ __ Sltu(TMP, out_lo, TMP);
+ __ Addu(out_hi, out_hi, TMP);
+ break;
+ }
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateAbsFP(locations, abs->GetResultType(), isR2OrNewer, isR6);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderMIPS::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index c91cb62eda..50807310b6 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -81,9 +81,9 @@ class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionV
InvokeDexCallingConventionVisitorMIPS() {}
virtual ~InvokeDexCallingConventionVisitorMIPS() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type type) const OVERRIDE;
- Location GetMethodLocation() const OVERRIDE;
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConvention calling_convention;
@@ -110,23 +110,23 @@ class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionMIPS() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return Location::RegisterLocation(A1);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(A0);
}
- Location GetReturnLocation(DataType::Type type) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type) const override {
return DataType::Is64BitType(type)
? Location::RegisterPairLocation(V0, V1)
: Location::RegisterLocation(V0);
}
- Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE {
+ Location GetSetValueLocation(DataType::Type type, bool is_instance) const override {
return DataType::Is64BitType(type)
? Location::RegisterPairLocation(A2, A3)
: (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1));
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::FpuRegisterLocation(F0);
}
@@ -139,10 +139,10 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap {
ParallelMoveResolverMIPS(ArenaAllocator* allocator, CodeGeneratorMIPS* codegen)
: ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
- void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
void Exchange(int index1, int index2, bool double_slot);
void ExchangeQuadSlots(int index1, int index2);
@@ -176,14 +176,14 @@ class LocationsBuilderMIPS : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -210,14 +210,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
InstructionCodeGeneratorMIPS(HGraph* graph, CodeGeneratorMIPS* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_MIPS(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -237,6 +237,7 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
void HandleShift(HBinaryOperation* operation);
@@ -246,6 +247,11 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, bool isR6, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation*, bool is_min);
+ void GenerateAbsFP(LocationSummary* locations, DataType::Type type, bool isR2OrNewer, bool isR6);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -364,40 +370,39 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator {
class CodeGeneratorMIPS : public CodeGenerator {
public:
CodeGeneratorMIPS(HGraph* graph,
- const MipsInstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorMIPS() {}
- void ComputeSpillMask() OVERRIDE;
- bool HasAllocatedCalleeSaveRegisters() const OVERRIDE;
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
+ void ComputeSpillMask() override;
+ bool HasAllocatedCalleeSaveRegisters() const override;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
- void Bind(HBasicBlock* block) OVERRIDE;
+ void Bind(HBasicBlock* block) override;
void MoveConstant(Location location, HConstant* c);
- size_t GetWordSize() const OVERRIDE { return kMipsWordSize; }
+ size_t GetWordSize() const override { return kMipsWordSize; }
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ size_t GetFloatingPointSpillSlotSize() const override {
return GetGraph()->HasSIMD()
? 2 * kMipsDoublewordSize // 16 bytes for each spill.
: 1 * kMipsDoublewordSize; // 8 bytes for each spill.
}
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
return assembler_.GetLabelLocation(GetLabelOf(block));
}
- HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
- HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
- MipsAssembler* GetAssembler() OVERRIDE { return &assembler_; }
- const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; }
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+ HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+ MipsAssembler* GetAssembler() override { return &assembler_; }
+ const MipsAssembler& GetAssembler() const override { return assembler_; }
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
@@ -488,48 +493,46 @@ class CodeGeneratorMIPS : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters() const OVERRIDE;
+ void SetupBlockedRegisters() const override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
void ClobberRA() {
clobbered_ra_ = true;
}
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; }
+ InstructionSet GetInstructionSet() const override { return InstructionSet::kMips; }
- const MipsInstructionSetFeatures& GetInstructionSetFeatures() const {
- return isa_features_;
- }
+ const MipsInstructionSetFeatures& GetInstructionSetFeatures() const;
MipsLabel* GetLabelOf(HBasicBlock* block) const {
return CommonGetLabelOf<MipsLabel>(block_labels_, block);
}
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_ = CommonInitializeLabels<MipsLabel>();
}
- void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void Finalize(CodeAllocator* allocator) override;
// Code generation helpers.
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
+ void MoveConstant(Location destination, int32_t value) override;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -540,41 +543,41 @@ class CodeGeneratorMIPS : public CodeGenerator {
void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct);
- ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+ ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
- bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE {
+ bool NeedsTwoRegisters(DataType::Type type) const override {
return type == DataType::Type::kInt64;
}
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
- DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+ DataType::Type type ATTRIBUTE_UNUSED) override {
UNIMPLEMENTED(FATAL) << "Not implemented on MIPS";
}
- void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateNop() override;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
// The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
// whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
@@ -615,6 +618,10 @@ class CodeGeneratorMIPS : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -637,6 +644,9 @@ class CodeGeneratorMIPS : public CodeGenerator {
Register out,
Register base);
+ void LoadBootImageAddress(Register reg, uint32_t boot_image_reference);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
+
// The JitPatchInfo is used for JIT string and class loads.
struct JitPatchInfo {
JitPatchInfo(const DexFile& dex_file, uint64_t idx)
@@ -685,11 +695,11 @@ class CodeGeneratorMIPS : public CodeGenerator {
InstructionCodeGeneratorMIPS instruction_visitor_;
ParallelMoveResolverMIPS move_resolver_;
MipsAssembler assembler_;
- const MipsInstructionSetFeatures& isa_features_;
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -697,10 +707,12 @@ class CodeGeneratorMIPS : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative String patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<JitPatchInfo> jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 985ac2ca55..0d3cb3b8ca 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -24,6 +24,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_mips64.h"
@@ -111,6 +112,14 @@ Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type)
return Mips64ReturnLocation(type);
}
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // The reference is returned in the same register. This differs from the standard return location.
+ return caller_saves;
+}
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<CodeGeneratorMIPS64*>(codegen)->GetAssembler()-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kMips64PointerSize, x).Int32Value()
@@ -119,7 +128,7 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
explicit BoundsCheckSlowPathMIPS64(HBoundsCheck* instruction) : SlowPathCodeMIPS64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
@@ -144,9 +153,9 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathMIPS64);
@@ -157,16 +166,16 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
explicit DivZeroCheckSlowPathMIPS64(HDivZeroCheck* instruction)
: SlowPathCodeMIPS64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
mips64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathMIPS64);
@@ -174,35 +183,41 @@ class DivZeroCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
- LoadClassSlowPathMIPS64(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCodeMIPS64(at),
- cls_(cls),
- dex_pc_(dex_pc),
- do_clinit_(do_clinit) {
+ LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at)
+ : SlowPathCodeMIPS64(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
- InvokeRuntimeCallingConvention calling_convention;
- DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
- dex::TypeIndex type_index = cls_->GetTypeIndex();
- __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
- QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
- : kQuickInitializeType;
- mips64_codegen->InvokeRuntime(entrypoint, instruction_, dex_pc_, this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ InvokeRuntimeCallingConvention calling_convention;
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), mips64_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_);
+ mips64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+ source,
+ cls_->GetType());
+ }
+ if (must_do_clinit) {
+ mips64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
@@ -218,18 +233,12 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathMIPS64"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64);
};
@@ -238,7 +247,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
explicit LoadStringSlowPathMIPS64(HLoadString* instruction)
: SlowPathCodeMIPS64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
DCHECK(instruction_->IsLoadString());
DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry);
LocationSummary* locations = instruction_->GetLocations();
@@ -265,7 +274,7 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64);
@@ -275,7 +284,7 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
explicit NullCheckSlowPathMIPS64(HNullCheck* instr) : SlowPathCodeMIPS64(instr) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -289,9 +298,9 @@ class NullCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathMIPS64);
@@ -302,7 +311,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
SuspendCheckSlowPathMIPS64(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCodeMIPS64(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
@@ -322,7 +331,7 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
return &return_label_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathMIPS64"; }
HBasicBlock* GetSuccessor() const {
return successor_;
@@ -343,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal)
: SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
uint32_t dex_pc = instruction_->GetDexPc();
@@ -383,9 +392,9 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathMIPS64"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -398,7 +407,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
explicit DeoptimizationSlowPathMIPS64(HDeoptimize* instruction)
: SlowPathCodeMIPS64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -410,7 +419,7 @@ class DeoptimizationSlowPathMIPS64 : public SlowPathCodeMIPS64 {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
@@ -420,7 +429,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
public:
explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -451,7 +460,7 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathMIPS64"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64);
@@ -481,9 +490,9 @@ class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathMIPS"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
DCHECK(locations->CanCall());
@@ -574,11 +583,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64";
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
DCHECK(locations->CanCall());
@@ -735,7 +744,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
@@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierForHeapReferenceSlowPathMIPS64";
}
@@ -900,7 +909,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DataType::Type type = DataType::Type::kReference;
GpuRegister reg_out = out_.AsRegister<GpuRegister>();
@@ -929,7 +938,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathMIPS64"; }
private:
const Location out_;
@@ -939,13 +948,12 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
};
CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
- const Mips64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
kNumberOfGpuRegisters,
kNumberOfFpuRegisters,
- /* number_of_register_pairs */ 0,
+ /* number_of_register_pairs= */ 0,
ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
arraysize(kCoreCalleeSaves)),
ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
@@ -956,8 +964,8 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
location_builder_(graph, this),
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
- assembler_(graph->GetAllocator(), &isa_features),
- isa_features_(isa_features),
+ assembler_(graph->GetAllocator(),
+ compiler_options.GetInstructionSetFeatures()->AsMips64InstructionSetFeatures()),
uint32_literals_(std::less<uint32_t>(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
uint64_literals_(std::less<uint64_t>(),
@@ -968,6 +976,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(StringReferenceValueComparator(),
graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(TypeReferenceValueComparator(),
@@ -988,8 +997,7 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) {
// Adjust native pc offsets in stack maps.
StackMapStream* stack_map_stream = GetStackMapStream();
for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) {
- uint32_t old_position =
- stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64);
+ uint32_t old_position = stack_map_stream->GetStackMapNativePcOffset(i);
uint32_t new_position = __ GetAdjustedPosition(old_position);
DCHECK_GE(new_position, old_position);
stack_map_stream->SetStackMapNativePcOffset(i, new_position);
@@ -1482,12 +1490,27 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object,
if (value_can_be_null) {
__ Beqzc(value, &done);
}
+ // Load the address of the card table into `card`.
__ LoadFromOffset(kLoadDoubleword,
card,
TR,
Thread::CardTableOffset<kMips64PointerSize>().Int32Value());
+ // Calculate the address of the card corresponding to `object`.
__ Dsrl(temp, object, gc::accounting::CardTable::kCardShift);
__ Daddu(temp, card, temp);
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the SB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ Sb(card, temp, 0);
if (value_can_be_null) {
__ Bind(&done);
@@ -1509,6 +1532,15 @@ inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1517,7 +1549,8 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- string_bss_entry_patches_.size();
+ string_bss_entry_patches_.size() +
+ boot_image_intrinsic_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
@@ -1526,12 +1559,14 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1542,6 +1577,20 @@ void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
DCHECK_EQ(size, linker_patches->size());
}
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageIntrinsicPatch(
+ uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, intrinsic_data, info_high, &boot_image_intrinsic_patches_);
+}
+
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageRelRoPatch(
+ uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high) {
+ return NewPcRelativePatch(
+ /* dex_file= */ nullptr, boot_image_offset, info_high, &boot_image_method_patches_);
+}
+
CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch(
MethodReference target_method,
const PcRelativePatchInfo* info_high) {
@@ -1616,7 +1665,7 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn
DCHECK(!info_high->patch_info_high);
__ Bind(&info_high->label);
// Add the high half of a 32-bit offset to PC.
- __ Auipc(out, /* placeholder */ 0x1234);
+ __ Auipc(out, /* imm16= */ 0x1234);
// A following instruction will add the sign-extended low half of the 32-bit
// offset to `out` (e.g. ld, jialc, daddiu).
if (info_low != nullptr) {
@@ -1625,13 +1674,57 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn
}
}
+void CodeGeneratorMIPS64::LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ PcRelativePatchInfo* info_high = NewBootImageIntrinsicPatch(boot_image_reference);
+ PcRelativePatchInfo* info_low = NewBootImageIntrinsicPatch(boot_image_reference, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ __ Daddiu(reg, AT, /* imm16= */ 0x5678);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_reference);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_reference, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ Lwu(reg, AT, /* imm16= */ 0x5678);
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ uintptr_t address =
+ reinterpret_cast<uintptr_t>(heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference);
+ __ LoadLiteral(reg, kLoadDoubleword, DeduplicateBootImageAddressLiteral(address));
+ }
+}
+
+void CodeGeneratorMIPS64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConvention calling_convention;
+ GpuRegister argument = calling_convention.GetRegisterAt(0);
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ PcRelativePatchInfo* info_high = NewBootImageTypePatch(*target_method.dex_file, type_idx);
+ PcRelativePatchInfo* info_low =
+ NewBootImageTypePatch(*target_method.dex_file, type_idx, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ __ Daddiu(argument, AT, /* imm16= */ 0x5678);
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file,
dex::StringIndex string_index,
Handle<mirror::String> handle) {
ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
return jit_string_patches_.GetOrCreate(
StringReference(&dex_file, string_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
}
Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file,
@@ -1640,7 +1733,7 @@ Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file
ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
return jit_class_patches_.GetOrCreate(
TypeReference(&dex_file, type_index),
- [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); });
+ [this]() { return __ NewLiteral<uint32_t>(/* value= */ 0u); });
}
void CodeGeneratorMIPS64::PatchJitRootUse(uint8_t* code,
@@ -1740,6 +1833,10 @@ void CodeGeneratorMIPS64::DumpFloatingPointRegister(std::ostream& stream, int re
stream << FpuRegister(reg);
}
+const Mips64InstructionSetFeatures& CodeGeneratorMIPS64::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsMips64InstructionSetFeatures();
+}
+
void CodeGeneratorMIPS64::InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
@@ -1780,6 +1877,34 @@ void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCo
__ Bind(slow_path->GetExitLabel());
}
+void InstructionCodeGeneratorMIPS64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ GpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Load only the bitstring part of the status word.
+ __ LoadFromOffset(
+ kLoadUnsignedHalfword, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ LoadFromOffset(kLoadWord, temp, temp, mirror::Class::StatusOffset().Int32Value());
+ // Compare the bitstring bits using XOR.
+ if (IsUint<16>(path_to_root)) {
+ __ Xori(temp, temp, dchecked_integral_cast<uint16_t>(path_to_root));
+ } else {
+ __ LoadConst32(TMP, path_to_root);
+ __ Xor(temp, temp, TMP);
+ }
+ // Shift out bits that do not contribute to the comparison.
+ __ Sll(temp, temp, 32 - mask_bits);
+ }
+}
+
void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind ATTRIBUTE_UNUSED) {
__ Sync(0); // only stype 0 is supported
}
@@ -2333,7 +2458,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
obj,
offset,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
out_loc,
@@ -2341,7 +2466,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
data_offset,
index,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
}
} else {
GpuRegister out = out_loc.AsRegister<GpuRegister>();
@@ -2840,7 +2965,13 @@ void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -2849,7 +2980,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
@@ -2888,7 +3019,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
kWithoutReadBarrier);
// Jump to slow path for throwing the exception or doing a
// more involved array check.
- __ Bnec(temp, cls, slow_path->GetEntryLabel());
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
break;
}
@@ -2914,7 +3045,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// exception.
__ Beqzc(temp, slow_path->GetEntryLabel());
// Otherwise, compare the classes.
- __ Bnec(temp, cls, &loop);
+ __ Bnec(temp, cls.AsRegister<GpuRegister>(), &loop);
break;
}
@@ -2929,7 +3060,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop;
__ Bind(&loop);
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// /* HeapReference<Class> */ temp = temp->super_class_
GenerateReferenceLoadOneRegister(instruction,
temp_loc,
@@ -2952,7 +3083,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
maybe_temp2_loc,
kWithoutReadBarrier);
// Do an exact check.
- __ Beqc(temp, cls, &done);
+ __ Beqc(temp, cls.AsRegister<GpuRegister>(), &done);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ temp = temp->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -3011,7 +3142,21 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
__ Daddiu(temp, temp, 2 * kHeapReferenceSize);
__ Addiu(TMP, TMP, -2);
// Compare the classes and continue the loop if they do not match.
- __ Bnec(AT, cls, &loop);
+ __ Bnec(AT, cls.AsRegister<GpuRegister>(), &loop);
+ break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp2_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ Bnezc(temp, slow_path->GetEntryLabel());
break;
}
}
@@ -3027,15 +3172,14 @@ void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
}
void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) {
// We assume the class is not null.
- SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(
- check->GetLoadClass(),
- check,
- check->GetDexPc(),
- true);
+ SlowPathCodeMIPS64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path,
check->GetLocations()->InAt(0).AsRegister<GpuRegister>());
@@ -3193,10 +3337,10 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) {
switch (type) {
default:
// Integer case.
- GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations);
+ GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ false, locations);
return;
case DataType::Type::kInt64:
- GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations);
+ GenerateIntLongCompare(instruction->GetCondition(), /* is64bit= */ true, locations);
return;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
@@ -3498,7 +3642,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio
if (!DataType::IsIntegralType(type)) {
LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck.";
- return;
+ UNREACHABLE();
}
if (value.IsConstant()) {
@@ -4305,10 +4449,10 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc
switch (type) {
default:
- GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target);
+ GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ false, locations, branch_target);
break;
case DataType::Type::kInt64:
- GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target);
+ GenerateIntLongCompareAndBranch(if_cond, /* is64bit= */ true, locations, branch_target);
break;
case DataType::Type::kFloat32:
case DataType::Type::kFloat64:
@@ -4338,7 +4482,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Mips64Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -4357,9 +4501,9 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCodeMIPS64* slow_path =
deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathMIPS64>(deoptimize);
GenerateTestAndBranch(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
// This function returns true if a conditional move can be generated for HSelect.
@@ -4373,7 +4517,7 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) {
// of common logic.
static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) {
bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition());
- HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HInstruction* cond = select->InputAt(/* i= */ 2);
HCondition* condition = cond->AsCondition();
DataType::Type cond_type =
@@ -4516,7 +4660,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) {
Location dst = locations->Out();
Location false_src = locations->InAt(0);
Location true_src = locations->InAt(1);
- HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+ HInstruction* cond = select->InputAt(/* i= */ 2);
GpuRegister cond_reg = TMP;
FpuRegister fcond_reg = FTMP;
DataType::Type cond_type = DataType::Type::kInt32;
@@ -4524,7 +4668,7 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) {
DataType::Type dst_type = select->GetType();
if (IsBooleanValueOrMaterializedCondition(cond)) {
- cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>();
+ cond_reg = locations->InAt(/* at= */ 2).AsRegister<GpuRegister>();
} else {
HCondition* condition = cond->AsCondition();
LocationSummary* cond_locations = cond->GetLocations();
@@ -4533,13 +4677,13 @@ void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) {
switch (cond_type) {
default:
cond_inverted = MaterializeIntLongCompare(if_cond,
- /* is64bit */ false,
+ /* is64bit= */ false,
cond_locations,
cond_reg);
break;
case DataType::Type::kInt64:
cond_inverted = MaterializeIntLongCompare(if_cond,
- /* is64bit */ true,
+ /* is64bit= */ true,
cond_locations,
cond_reg);
break;
@@ -4682,14 +4826,14 @@ void LocationsBuilderMIPS64::VisitSelect(HSelect* select) {
}
void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) {
- if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) {
+ if (CanMoveConditionally(select, /* locations_to_set= */ nullptr)) {
GenConditionalMove(select);
} else {
LocationSummary* locations = select->GetLocations();
Mips64Label false_target;
GenerateTestAndBranch(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
+ /* condition_input_index= */ 2,
+ /* true_target= */ nullptr,
&false_target);
codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
__ Bind(&false_target);
@@ -4801,7 +4945,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction,
obj,
offset,
temp_loc,
- /* needs_null_check */ true);
+ /* needs_null_check= */ true);
if (is_volatile) {
GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -4957,7 +5101,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
out_reg,
offset,
maybe_temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -4998,7 +5142,7 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
obj_reg,
offset,
maybe_temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -5086,7 +5230,7 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instr
__ Daui(base, obj, offset_high);
}
Mips64Label skip_call;
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
if (label_low != nullptr) {
DCHECK(short_offset);
__ Bind(label_low);
@@ -5216,7 +5360,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
Mips64Label skip_call;
if (short_offset) {
- __ Beqzc(T9, &skip_call, /* is_bare */ true);
+ __ Beqzc(T9, &skip_call, /* is_bare= */ true);
__ Nop(); // In forbidden slot.
__ Jialc(T9, thunk_disp);
__ Bind(&skip_call);
@@ -5225,7 +5369,7 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in
} else {
int16_t offset_low = Low16Bits(offset);
int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu.
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
__ Daui(TMP, obj, offset_high); // In delay slot.
__ Jialc(T9, thunk_disp);
__ Bind(&skip_call);
@@ -5298,12 +5442,12 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in
// We will not do the explicit null check in the thunk as some form of a null check
// must've been done earlier.
DCHECK(!needs_null_check);
- const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false);
+ const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset= */ false);
// Loading the entrypoint does not require a load acquire since it is only changed when
// threads are suspended or running a checkpoint.
__ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
Mips64Label skip_call;
- __ Beqz(T9, &skip_call, /* is_bare */ true);
+ __ Beqz(T9, &skip_call, /* is_bare= */ true);
GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
GpuRegister index_reg = index.AsRegister<GpuRegister>();
__ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot.
@@ -5414,7 +5558,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction,
ref,
obj,
- /* field_offset */ index,
+ /* field_offset= */ index,
temp_reg);
} else {
slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64(instruction, ref);
@@ -5426,7 +5570,7 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
// Given the numeric representation, it's enough to check the low bit of the
// rb_state. We do that by shifting the bit into the sign bit (31) and
// performing a branch on less than zero.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
__ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
@@ -5515,6 +5659,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5523,7 +5669,13 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::RequiresRegister());
+ }
// The output does overlap inputs.
// Note that TypeCheckSlowPathMIPS64 uses this register too.
locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
@@ -5535,7 +5687,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location obj_loc = locations->InAt(0);
GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
- GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
+ Location cls = locations->InAt(1);
Location out_loc = locations->Out();
GpuRegister out = out_loc.AsRegister<GpuRegister>();
const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
@@ -5567,7 +5719,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
maybe_temp_loc,
read_barrier_option);
// Classes must be equal for the instanceof to succeed.
- __ Xor(out, out, cls);
+ __ Xor(out, out, cls.AsRegister<GpuRegister>());
__ Sltiu(out, out, 1);
break;
}
@@ -5594,7 +5746,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// If `out` is null, we use it for the result, and jump to `done`.
__ Beqzc(out, &done);
- __ Bnec(out, cls, &loop);
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), &loop);
__ LoadConst32(out, 1);
break;
}
@@ -5612,7 +5764,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// Walk over the class hierarchy to find a match.
Mips64Label loop, success;
__ Bind(&loop);
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// /* HeapReference<Class> */ out = out->super_class_
GenerateReferenceLoadOneRegister(instruction,
out_loc,
@@ -5639,7 +5791,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
read_barrier_option);
// Do an exact check.
Mips64Label success;
- __ Beqc(out, cls, &success);
+ __ Beqc(out, cls.AsRegister<GpuRegister>(), &success);
// Otherwise, we need to check that the object's class is a non-primitive array.
// /* HeapReference<Class> */ out = out->component_type_
GenerateReferenceLoadOneRegister(instruction,
@@ -5669,9 +5821,9 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
kWithoutReadBarrier);
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
- __ Bnec(out, cls, slow_path->GetEntryLabel());
+ __ Bnec(out, cls.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
__ LoadConst32(out, 1);
break;
}
@@ -5698,11 +5850,25 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ Bc(slow_path->GetEntryLabel());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ maybe_temp_loc,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ Sltiu(out, out, 1);
+ break;
+ }
}
__ Bind(&done);
@@ -5825,6 +5991,14 @@ void InstructionCodeGeneratorMIPS64::VisitInvokePolymorphic(HInvokePolymorphic*
codegen_->GenerateInvokePolymorphicCall(invoke);
}
+void LocationsBuilderMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+}
+
static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) {
if (invoke->GetLocations()->Intrinsified()) {
IntrinsicCodeGeneratorMIPS64 intrinsic(codegen);
@@ -5839,14 +6013,14 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
bool fallback_load = false;
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -5866,14 +6040,14 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -5885,7 +6059,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
// On MIPS64 we support all dispatch types.
return desired_dispatch_info;
}
@@ -5918,23 +6092,32 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high);
EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+ __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678);
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- __ LoadLiteral(temp.AsRegister<GpuRegister>(),
- kLoadDoubleword,
- DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ uint32_t boot_image_offset = GetBootImageOffset(invoke);
+ PcRelativePatchInfo* info_high = NewBootImageRelRoPatch(boot_image_offset);
+ PcRelativePatchInfo* info_low = NewBootImageRelRoPatch(boot_image_offset, info_high);
+ EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ Lwu(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678);
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
PcRelativePatchInfo* info_high = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
PcRelativePatchInfo* info_low = NewMethodBssEntryPatch(
MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high);
EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+ __ Ld(temp.AsRegister<GpuRegister>(), AT, /* imm16= */ 0x5678);
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ __ LoadLiteral(temp.AsRegister<GpuRegister>(),
+ kLoadDoubleword,
+ DeduplicateUint64Literal(invoke->GetMethodAddress()));
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -6048,10 +6231,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution or initialization and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barriers we have a temp-clobbering call.
}
@@ -6100,33 +6280,18 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Daddiu(out, AT, /* placeholder */ 0x5678);
- break;
- }
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- __ LoadLiteral(out,
- kLoadUnsignedWord,
- codegen_->DeduplicateBootImageAddressLiteral(address));
+ __ Daddiu(out, AT, /* imm16= */ 0x5678);
break;
}
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(cls);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ Daddiu(out, out, -masked_hash);
- }
+ __ Lwu(out, AT, /* imm16= */ 0x5678);
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6138,12 +6303,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
GenerateGcRootFieldLoad(cls,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
read_barrier_option,
&info_low->label);
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ LoadLiteral(out,
+ kLoadUnsignedWord,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress:
__ LoadLiteral(out,
kLoadUnsignedWord,
@@ -6160,8 +6334,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
- SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ SlowPathCodeMIPS64* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ Beqzc(out, slow_path->GetEntryLabel());
@@ -6174,6 +6348,26 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
}
}
+void LocationsBuilderMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, loc, loc);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderMIPS64::VisitLoadMethodType(HLoadMethodType* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, loc, loc);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+}
+
static int32_t GetExceptionTlsOffset() {
return Thread::ExceptionOffset<kMips64PointerSize>().Int32Value();
}
@@ -6209,10 +6403,7 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) {
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString and marking to save everything we need.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barriers we have a temp-clobbering call.
}
@@ -6236,30 +6427,21 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Daddiu(out, AT, /* placeholder */ 0x5678);
- return;
- }
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- __ LoadLiteral(out,
- kLoadUnsignedWord,
- codegen_->DeduplicateBootImageAddressLiteral(address));
+ __ Daddiu(out, AT, /* imm16= */ 0x5678);
return;
}
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+ uint32_t boot_image_offset = codegen_->GetBootImageOffset(load);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex());
+ codegen_->NewBootImageRelRoPatch(boot_image_offset);
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
- codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high);
+ codegen_->NewBootImageRelRoPatch(boot_image_offset, info_high);
codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low);
- __ Lwu(out, AT, /* placeholder */ 0x5678);
+ __ Lwu(out, AT, /* imm16= */ 0x5678);
return;
}
case HLoadString::LoadKind::kBssEntry: {
- DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
CodeGeneratorMIPS64::PcRelativePatchInfo* info_high =
codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex());
CodeGeneratorMIPS64::PcRelativePatchInfo* info_low =
@@ -6268,7 +6450,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
GenerateGcRootFieldLoad(load,
out_loc,
out,
- /* placeholder */ 0x5678,
+ /* offset= */ 0x5678,
kCompilerReadBarrierOption,
&info_low->label);
SlowPathCodeMIPS64* slow_path =
@@ -6278,6 +6460,14 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ LoadLiteral(out,
+ kLoadUnsignedWord,
+ codegen_->DeduplicateBootImageAddressLiteral(address));
+ return;
+ }
case HLoadString::LoadKind::kJitTableAddress:
__ LoadLiteral(out,
kLoadUnsignedWord,
@@ -6442,10 +6632,8 @@ void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
@@ -6455,31 +6643,13 @@ void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference));
}
void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes care
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
- MemberOffset code_offset =
- ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMips64PointerSize);
- __ LoadFromOffset(kLoadDoubleword, temp, TR, QUICK_ENTRY_POINT(pNewEmptyString));
- __ LoadFromOffset(kLoadDoubleword, T9, temp, code_offset.Int32Value());
- __ Jalr(T9);
- __ Nop();
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
}
void LocationsBuilderMIPS64::VisitNot(HNot* instruction) {
@@ -6665,6 +6835,236 @@ void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxInt(LocationSummary* locations, bool is_min) {
+ GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+
+ if (lhs == rhs) {
+ if (out != lhs) {
+ __ Move(out, lhs);
+ }
+ } else {
+ // Some architectures, such as ARM and MIPS (prior to r6), have a
+ // conditional move instruction which only changes the target
+ // (output) register if the condition is true (MIPS prior to r6 had
+ // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
+ // change the target (output) register. If the condition is true the
+ // output register gets the contents of the "rs" register; otherwise,
+ // the output register is set to zero. One consequence of this is
+ // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
+ // needs to use a pair of SELEQZ/SELNEZ instructions. After
+ // executing this pair of instructions one of the output registers
+ // from the pair will necessarily contain zero. Then the code ORs the
+ // output registers from the SELEQZ/SELNEZ instructions to get the
+ // final result.
+ //
+ // The initial test to see if the output register is same as the
+ // first input register is needed to make sure that value in the
+ // first input register isn't clobbered before we've finished
+ // computing the output value. The logic in the corresponding else
+ // clause performs the same task but makes sure the second input
+ // register isn't clobbered in the event that it's the same register
+ // as the output register; the else clause also handles the case
+ // where the output register is distinct from both the first, and the
+ // second input registers.
+ if (out == lhs) {
+ __ Slt(AT, rhs, lhs);
+ if (is_min) {
+ __ Seleqz(out, lhs, AT);
+ __ Selnez(AT, rhs, AT);
+ } else {
+ __ Selnez(out, lhs, AT);
+ __ Seleqz(AT, rhs, AT);
+ }
+ } else {
+ __ Slt(AT, lhs, rhs);
+ if (is_min) {
+ __ Seleqz(out, rhs, AT);
+ __ Selnez(AT, lhs, AT);
+ } else {
+ __ Selnez(out, rhs, AT);
+ __ Seleqz(AT, lhs, AT);
+ }
+ }
+ __ Or(out, out, AT);
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+
+ Mips64Label noNaNs;
+ Mips64Label done;
+ FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
+
+ // When Java computes min/max it prefers a NaN to a number; the
+ // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
+ // the inputs is a NaN and the other is a valid number, the MIPS
+ // instruction will return the number; Java wants the NaN value
+ // returned. This is why there is extra logic preceding the use of
+ // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
+ // NaN, return the NaN, otherwise return the min/max.
+ if (type == DataType::Type::kFloat64) {
+ __ CmpUnD(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqD(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelD(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovD(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinD(out, a, b);
+ } else {
+ __ MaxD(out, a, b);
+ }
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ CmpUnS(FTMP, a, b);
+ __ Bc1eqz(FTMP, &noNaNs);
+
+ // One of the inputs is a NaN
+ __ CmpEqS(ftmp, a, a);
+ // If a == a then b is the NaN, otherwise a is the NaN.
+ __ SelS(ftmp, a, b);
+
+ if (ftmp != out) {
+ __ MovS(out, ftmp);
+ }
+
+ __ Bc(&done);
+
+ __ Bind(&noNaNs);
+
+ if (is_min) {
+ __ MinS(out, a, b);
+ } else {
+ __ MaxS(out, a, b);
+ }
+ }
+
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderMIPS64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderMIPS64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderMIPS64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorMIPS64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ __ Sra(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Subu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
+ GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+ __ Dsra32(AT, in, 31);
+ __ Xor(out, in, AT);
+ __ Dsubu(out, out, AT);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ __ AbsS(out, in);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
+ FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+ __ AbsD(out, in);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected abs type " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderMIPS64::VisitConstructorFence(HConstructorFence* constructor_fence) {
constructor_fence->SetLocations(nullptr);
}
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index e6b69c469f..52f3a62f33 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -79,9 +79,9 @@ class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventio
InvokeDexCallingConventionVisitorMIPS64() {}
virtual ~InvokeDexCallingConventionVisitorMIPS64() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type type) const OVERRIDE;
- Location GetMethodLocation() const OVERRIDE;
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConvention calling_convention;
@@ -108,22 +108,22 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionMIPS64() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return Location::RegisterLocation(A1);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(A0);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::RegisterLocation(V0);
}
Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED,
- bool is_instance) const OVERRIDE {
+ bool is_instance) const override {
return is_instance
? Location::RegisterLocation(A2)
: Location::RegisterLocation(A1);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::FpuRegisterLocation(F0);
}
@@ -136,10 +136,10 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap {
ParallelMoveResolverMIPS64(ArenaAllocator* allocator, CodeGeneratorMIPS64* codegen)
: ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
- void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
void Exchange(int index1, int index2, bool double_slot);
void ExchangeQuadSlots(int index1, int index2);
@@ -173,14 +173,14 @@ class LocationsBuilderMIPS64 : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -207,14 +207,14 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
InstructionCodeGeneratorMIPS64(HGraph* graph, CodeGeneratorMIPS64* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -233,6 +233,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
private:
void GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, GpuRegister temp);
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void HandleBinaryOp(HBinaryOperation* operation);
void HandleCondition(HCondition* instruction);
@@ -242,6 +243,10 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -347,36 +352,35 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator {
class CodeGeneratorMIPS64 : public CodeGenerator {
public:
CodeGeneratorMIPS64(HGraph* graph,
- const Mips64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorMIPS64() {}
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
- void Bind(HBasicBlock* block) OVERRIDE;
+ void Bind(HBasicBlock* block) override;
- size_t GetWordSize() const OVERRIDE { return kMips64DoublewordSize; }
+ size_t GetWordSize() const override { return kMips64DoublewordSize; }
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ size_t GetFloatingPointSpillSlotSize() const override {
return GetGraph()->HasSIMD()
? 2 * kMips64DoublewordSize // 16 bytes for each spill.
: 1 * kMips64DoublewordSize; // 8 bytes for each spill.
}
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
return assembler_.GetLabelLocation(GetLabelOf(block));
}
- HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
- HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
- Mips64Assembler* GetAssembler() OVERRIDE { return &assembler_; }
- const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; }
+ HGraphVisitor* GetLocationBuilder() override { return &location_builder_; }
+ HGraphVisitor* GetInstructionVisitor() override { return &instruction_visitor_; }
+ Mips64Assembler* GetAssembler() override { return &assembler_; }
+ const Mips64Assembler& GetAssembler() const override { return assembler_; }
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
@@ -467,42 +471,40 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
// Register allocation.
- void SetupBlockedRegisters() const OVERRIDE;
+ void SetupBlockedRegisters() const override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips64; }
+ InstructionSet GetInstructionSet() const override { return InstructionSet::kMips64; }
- const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const {
- return isa_features_;
- }
+ const Mips64InstructionSetFeatures& GetInstructionSetFeatures() const;
Mips64Label* GetLabelOf(HBasicBlock* block) const {
return CommonGetLabelOf<Mips64Label>(block_labels_, block);
}
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_ = CommonInitializeLabels<Mips64Label>();
}
// We prefer aligned loads and stores (less code), so spill and restore registers in slow paths
// at aligned locations.
- uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return kMips64DoublewordSize; }
+ uint32_t GetPreferredSlotsAlignment() const override { return kMips64DoublewordSize; }
- void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void Finalize(CodeAllocator* allocator) override;
// Code generation helpers.
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
+ void MoveConstant(Location destination, int32_t value) override;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
void SwapLocations(Location loc1, Location loc2, DataType::Type type);
@@ -511,7 +513,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -521,39 +523,39 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
void GenerateInvokeRuntime(int32_t entry_point_offset);
- ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
+ ParallelMoveResolver* GetMoveResolver() override { return &move_resolver_; }
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
+ bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override { return false; }
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
- DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE {
+ DataType::Type type ATTRIBUTE_UNUSED) override {
UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64";
}
- void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateNop() override;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
// The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types,
// whether through .data.bimg.rel.ro, .bss, or directly in the boot image.
@@ -586,6 +588,10 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo);
};
+ PcRelativePatchInfo* NewBootImageIntrinsicPatch(uint32_t intrinsic_data,
+ const PcRelativePatchInfo* info_high = nullptr);
+ PcRelativePatchInfo* NewBootImageRelRoPatch(uint32_t boot_image_offset,
+ const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method,
const PcRelativePatchInfo* info_high = nullptr);
PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method,
@@ -608,6 +614,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
GpuRegister out,
PcRelativePatchInfo* info_low = nullptr);
+ void LoadBootImageAddress(GpuRegister reg, uint32_t boot_image_reference);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
+
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
const Literal* literal,
@@ -648,14 +657,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
InstructionCodeGeneratorMIPS64 instruction_visitor_;
ParallelMoveResolverMIPS64 move_resolver_;
Mips64Assembler assembler_;
- const Mips64InstructionSetFeatures& isa_features_;
// Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
Uint32ToLiteralMap uint32_literals_;
// Deduplication map for 64-bit literals, used for non-patchable method address or method code
// address.
Uint64ToLiteralMap uint64_literals_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
@@ -663,10 +672,12 @@ class CodeGeneratorMIPS64 : public CodeGenerator {
ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
- // PC-relative String patch info; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_;
// PC-relative type patch info for kBssEntry.
ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<PcRelativePatchInfo> boot_image_intrinsic_patches_;
// Patches for string root accesses in JIT compiled code.
StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index 1cfdf54816..df95c88c07 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -16,6 +16,7 @@
#include "code_generator_arm64.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
@@ -29,7 +30,7 @@ using helpers::Arm64CanEncodeConstantAsImmediate;
using helpers::DRegisterFrom;
using helpers::HeapOperand;
using helpers::InputRegisterAt;
-using helpers::Int64ConstantFrom;
+using helpers::Int64FromLocation;
using helpers::OutputRegister;
using helpers::VRegisterFrom;
using helpers::WRegisterFrom;
@@ -37,6 +38,15 @@ using helpers::XRegisterFrom;
#define __ GetVIXLAssembler()->
+// Build-time switch for Armv8.4-a dot product instructions.
+// TODO: Enable dot product when there is a device to test it on.
+static constexpr bool kArm64EmitDotProdInstructions = false;
+
+// Returns whether dot product instructions should be emitted.
+static bool ShouldEmitDotProductInstructions(const CodeGeneratorARM64* codegen_) {
+ return kArm64EmitDotProdInstructions && codegen_->GetInstructionSetFeatures().HasDotProd();
+}
+
void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
HInstruction* input = instruction->InputAt(0);
@@ -63,7 +73,7 @@ void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruc
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -78,7 +88,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
case DataType::Type::kInt8:
DCHECK_EQ(16u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
- __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
+ __ Movi(dst.V16B(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
}
@@ -87,7 +97,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
case DataType::Type::kInt16:
DCHECK_EQ(8u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
- __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
+ __ Movi(dst.V8H(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
}
@@ -95,7 +105,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
- __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
+ __ Movi(dst.V4S(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
}
@@ -103,7 +113,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
if (src_loc.IsConstant()) {
- __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
+ __ Movi(dst.V2D(), Int64FromLocation(src_loc));
} else {
__ Dup(dst.V2D(), XRegisterFrom(src_loc));
}
@@ -125,7 +135,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar*
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -149,7 +159,7 @@ void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -173,7 +183,7 @@ void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* ins
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -200,7 +210,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -216,7 +226,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Addv(dst.S(), src.V4S());
break;
@@ -230,7 +240,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Addp(dst.D(), src.V2D());
break;
@@ -240,7 +250,7 @@ void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -259,7 +269,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Scvtf(dst.V4S(), src.V4S());
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -299,7 +309,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
__ Fneg(dst.V2D(), src.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -338,7 +348,7 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
__ Fabs(dst.V2D(), src.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -366,7 +376,7 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
__ Not(dst.V16B(), src.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -389,7 +399,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -431,7 +441,39 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
__ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqadd(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqadd(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -471,7 +513,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi
: __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -513,7 +555,39 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
__ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ VRegister lhs = VRegisterFrom(locations->InAt(0));
+ VRegister rhs = VRegisterFrom(locations->InAt(1));
+ VRegister dst = VRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Uqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ Sqsub(dst.V16B(), lhs.V16B(), rhs.V16B());
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Uqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Sqsub(dst.V8H(), lhs.V8H(), rhs.V8H());
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -551,7 +625,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
__ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -575,7 +649,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
__ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -623,7 +697,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
__ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -671,7 +745,7 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
__ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -699,7 +773,7 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
__ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -735,7 +809,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
__ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -762,7 +836,7 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
__ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -782,7 +856,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -816,7 +890,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
__ Shl(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -850,7 +924,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
__ Sshr(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -884,7 +958,7 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
__ Ushr(dst.V2D(), lhs.V2D(), value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -916,7 +990,7 @@ void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -957,7 +1031,7 @@ void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instructi
__ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -978,7 +1052,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1026,7 +1100,7 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1139,7 +1213,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1167,7 +1241,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1188,7 +1262,7 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
__ Sabal2(acc.V2D(), left.V4S(), right.V4S());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1204,12 +1278,88 @@ void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* ins
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ }
+}
+
+void LocationsBuilderARM64::VisitVecDotProd(HVecDotProd* instruction) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetInAt(2, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+
+ // For Int8 and Uint8 general case we need a temp register.
+ if ((DataType::Size(instruction->InputAt(1)->AsVecOperation()->GetPackedType()) == 1) &&
+ !ShouldEmitDotProductInstructions(codegen_)) {
+ locations->AddTemp(Location::RequiresFpuRegister());
+ }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecDotProd(HVecDotProd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ VRegister acc = VRegisterFrom(locations->InAt(0));
+ VRegister left = VRegisterFrom(locations->InAt(1));
+ VRegister right = VRegisterFrom(locations->InAt(2));
+ HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
+ HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
+ DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
+ HVecOperation::ToSignedType(b->GetPackedType()));
+ DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+
+ size_t inputs_data_size = DataType::Size(a->GetPackedType());
+ switch (inputs_data_size) {
+ case 1u: {
+ DCHECK_EQ(16u, a->GetVectorLength());
+ if (instruction->IsZeroExtending()) {
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Udot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Umull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+
+ __ Umull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Uaddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Uaddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
+ } else {
+ if (ShouldEmitDotProductInstructions(codegen_)) {
+ __ Sdot(acc.V4S(), left.V16B(), right.V16B());
+ } else {
+ VRegister tmp = VRegisterFrom(locations->GetTemp(0));
+ __ Smull(tmp.V8H(), left.V8B(), right.V8B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+
+ __ Smull2(tmp.V8H(), left.V16B(), right.V16B());
+ __ Saddw(acc.V4S(), acc.V4S(), tmp.V4H());
+ __ Saddw2(acc.V4S(), acc.V4S(), tmp.V8H());
+ }
+ }
+ break;
+ }
+ case 2u:
+ DCHECK_EQ(8u, a->GetVectorLength());
+ if (instruction->IsZeroExtending()) {
+ __ Umlal(acc.V4S(), left.V4H(), right.V4H());
+ __ Umlal2(acc.V4S(), left.V8H(), right.V8H());
+ } else {
+ __ Smlal(acc.V4S(), left.V4H(), right.V4H());
+ __ Smlal2(acc.V4S(), left.V8H(), right.V8H());
+ }
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
}
}
@@ -1237,7 +1387,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1269,7 +1419,7 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress(
DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
if (index.IsConstant()) {
- offset += Int64ConstantFrom(index) << shift;
+ offset += Int64FromLocation(index) << shift;
return HeapOperand(base, offset);
} else {
*scratch = temps_scope->AcquireSameSizeAs(base);
@@ -1331,7 +1481,7 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
__ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1362,7 +1512,7 @@ void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
__ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
index 7c3155ab73..b092961a56 100644
--- a/compiler/optimizing/code_generator_vector_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -46,7 +46,7 @@ void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instr
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -71,7 +71,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala
__ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -84,7 +84,7 @@ void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instructi
locations->SetOut(Location::RequiresRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -98,7 +98,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* i
__ Vmov(OutputRegister(instruction), DRegisterLane(src, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -122,7 +122,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -138,7 +138,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Vpadd(DataTypeValue::I32, dst, src, src);
break;
@@ -151,7 +151,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -188,7 +188,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
__ Vneg(DataTypeValue::S32, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -215,7 +215,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
__ Vabs(DataTypeValue::S32, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
__ Vmvn(I8, dst, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -262,7 +262,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -292,7 +292,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
__ Vadd(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::U8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::S8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::U16, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqadd(DataTypeValue::S16, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -332,7 +364,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc
: __ Vhadd(DataTypeValue::S16, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -362,7 +394,39 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
__ Vsub(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ vixl32::DRegister lhs = DRegisterFrom(locations->InAt(0));
+ vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1));
+ vixl32::DRegister dst = DRegisterFrom(locations->Out());
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::U8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::S8, dst, lhs, rhs);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::U16, dst, lhs, rhs);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(4u, instruction->GetVectorLength());
+ __ Vqsub(DataTypeValue::S16, dst, lhs, rhs);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -392,7 +456,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
__ Vmul(I32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -440,7 +504,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) {
__ Vmin(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -480,7 +544,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) {
__ Vmax(DataTypeValue::S32, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -505,7 +569,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
__ Vand(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -537,7 +601,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
__ Vorr(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -561,7 +625,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
__ Veor(I8, dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -580,7 +644,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -610,7 +674,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
__ Vshl(I32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -640,7 +704,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
__ Vshr(DataTypeValue::S32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -670,7 +734,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
__ Vshr(DataTypeValue::U32, dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -690,7 +754,7 @@ void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -716,7 +780,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruc
__ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -737,7 +801,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -780,16 +844,24 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* i
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderARMVIXL::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word
// size equals to 4).
static bool IsWordAligned(HVecMemoryOperation* instruction) {
@@ -817,7 +889,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -923,7 +995,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -971,7 +1043,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
index ed9de96496..4e9ba0d3d2 100644
--- a/compiler/optimizing/code_generator_vector_mips.cc
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -42,7 +42,7 @@ void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruct
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -74,22 +74,22 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar*
__ InsertW(static_cast<VectorRegister>(FTMP),
locations->InAt(0).AsRegisterPairHigh<Register>(),
1);
- __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double */ true);
+ __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double= */ true);
break;
case DataType::Type::kFloat32:
DCHECK_EQ(4u, instruction->GetVectorLength());
__ ReplicateFPToVectorRegister(dst,
locations->InAt(0).AsFpuRegister<FRegister>(),
- /* is_double */ false);
+ /* is_double= */ false);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
__ ReplicateFPToVectorRegister(dst,
locations->InAt(0).AsFpuRegister<FRegister>(),
- /* is_double */ true);
+ /* is_double= */ true);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -113,7 +113,7 @@ void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction)
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -138,7 +138,7 @@ void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* inst
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -170,7 +170,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
: Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -187,7 +187,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Hadd_sD(tmp, src, src);
__ IlvlD(dst, tmp, tmp);
@@ -209,7 +209,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ IlvlD(dst, src, src);
__ AddvD(dst, dst, src);
@@ -225,7 +225,7 @@ void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -244,7 +244,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ffint_sW(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -290,7 +290,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
__ FsubD(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -337,7 +337,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
__ AndV(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -369,7 +369,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
__ NorV(dst, src, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -392,7 +392,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -434,11 +434,19 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
__ FaddD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -474,7 +482,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio
: __ Ave_sH(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -516,11 +524,19 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
__ FsubD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -558,7 +574,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
__ FmulD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -582,7 +598,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
__ FdivD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -640,7 +656,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) {
__ FminD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -698,7 +714,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) {
__ FmaxD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -727,7 +743,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
__ AndV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -764,7 +780,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
__ OrV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -793,7 +809,7 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
__ XorV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -813,7 +829,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -847,7 +863,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
__ SlliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -881,7 +897,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
__ SraiD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -915,7 +931,7 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
__ SrliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -947,7 +963,7 @@ void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -989,7 +1005,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instructio
__ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1010,7 +1026,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1060,7 +1076,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumu
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1162,7 +1178,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1201,7 +1217,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1231,7 +1247,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1247,17 +1263,25 @@ void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* inst
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
HVecMemoryOperation* instruction,
@@ -1282,7 +1306,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1320,7 +1344,7 @@ int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations,
}
void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
- CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true);
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true);
}
void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
@@ -1357,13 +1381,13 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
__ LdD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) {
- CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false);
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false);
}
void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
@@ -1395,7 +1419,7 @@ void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
__ StD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 9ea55ec8d7..6467d3e27f 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -47,7 +47,7 @@ void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instru
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -79,16 +79,16 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar
DCHECK_EQ(4u, instruction->GetVectorLength());
__ ReplicateFPToVectorRegister(dst,
locations->InAt(0).AsFpuRegister<FpuRegister>(),
- /* is_double */ false);
+ /* is_double= */ false);
break;
case DataType::Type::kFloat64:
DCHECK_EQ(2u, instruction->GetVectorLength());
__ ReplicateFPToVectorRegister(dst,
locations->InAt(0).AsFpuRegister<FpuRegister>(),
- /* is_double */ true);
+ /* is_double= */ true);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -112,7 +112,7 @@ void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instructio
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -136,7 +136,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* in
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -168,7 +168,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
: Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -185,7 +185,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ Hadd_sD(tmp, src, src);
__ IlvlD(dst, tmp, tmp);
@@ -207,7 +207,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
break;
case DataType::Type::kInt64:
DCHECK_EQ(2u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ IlvlD(dst, src, src);
__ AddvD(dst, dst, src);
@@ -223,7 +223,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) {
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -242,7 +242,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ Ffint_sW(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -289,7 +289,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
__ FsubD(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -336,7 +336,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
__ AndV(dst, dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -368,7 +368,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
__ NorV(dst, src, src); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -391,7 +391,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -433,11 +433,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
__ FaddD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -473,7 +481,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
: __ Ave_sH(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -515,11 +523,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
__ FsubD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LOG(FATAL) << "Unsupported SIMD " << instruction->GetId();
+}
+
void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
}
@@ -557,7 +573,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
__ FmulD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -581,7 +597,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
__ FdivD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -639,7 +655,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
__ FminD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -697,7 +713,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
__ FmaxD(dst, lhs, rhs);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -726,7 +742,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
__ AndV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -763,7 +779,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
__ OrV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -792,7 +808,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
__ XorV(dst, lhs, rhs); // lanes do not matter
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -812,7 +828,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -846,7 +862,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
__ SlliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -880,7 +896,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
__ SraiD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -914,7 +930,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
__ SrliD(dst, lhs, value);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -946,7 +962,7 @@ void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -987,7 +1003,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruct
__ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1008,7 +1024,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1058,7 +1074,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccu
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1160,7 +1176,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1199,7 +1215,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1229,7 +1245,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
@@ -1245,17 +1261,25 @@ void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* in
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
+void LocationsBuilderMIPS64::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
HVecMemoryOperation* instruction,
@@ -1280,7 +1304,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1318,7 +1342,7 @@ int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations,
}
void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
- CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true);
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ true);
}
void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
@@ -1355,13 +1379,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
__ LdD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
- CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false);
+ CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load= */ false);
}
void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
@@ -1393,7 +1417,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
__ StD(reg, base, offset);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
index 4945328e2b..0ee00356b9 100644
--- a/compiler/optimizing/code_generator_vector_x86.cc
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -54,7 +54,7 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi
: Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -111,7 +111,7 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i
__ shufpd(dst, dst, Immediate(0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -138,7 +138,7 @@ void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction)
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -152,7 +152,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_LE(4u, instruction->GetVectorLength());
@@ -174,7 +174,7 @@ void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instr
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -196,7 +196,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -205,8 +205,8 @@ void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
// Long reduction or min/max require a temporary.
if (instruction->GetPackedType() == DataType::Type::kInt64 ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetReductionKind() == HVecReduce::kMin ||
+ instruction->GetReductionKind() == HVecReduce::kMax) {
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
}
}
@@ -218,38 +218,23 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ movaps(dst, src);
__ phaddd(dst, dst);
__ phaddd(dst, dst);
break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ // Historical note: We've had a broken implementation here. b/117863065
+ // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
+ LOG(FATAL) << "Unsupported reduction type.";
}
break;
case DataType::Type::kInt64: {
DCHECK_EQ(2u, instruction->GetVectorLength());
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ movaps(tmp, src);
__ movaps(dst, src);
@@ -258,12 +243,12 @@ void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) {
break;
case HVecReduce::kMin:
case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -282,7 +267,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ cvtdq2ps(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -328,7 +313,7 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -369,7 +354,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -418,7 +403,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -441,7 +426,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -483,7 +468,39 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
__ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -503,14 +520,14 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ pavgb(dst, src);
- return;
+ __ pavgb(dst, src);
+ break;
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pavgw(dst, src);
- return;
+ break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -552,7 +569,39 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -585,7 +634,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
__ mulpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -609,7 +658,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
__ divpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -658,7 +707,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) {
__ minpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -707,7 +756,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) {
__ maxpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -742,7 +791,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -777,7 +826,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
__ andnpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -812,7 +861,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
__ orpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -847,7 +896,7 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -865,7 +914,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -894,7 +943,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
__ psllq(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -919,7 +968,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
__ psrad(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -948,7 +997,7 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
__ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -985,7 +1034,7 @@ void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1011,7 +1060,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1035,7 +1084,7 @@ void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction
__ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1056,7 +1105,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1079,6 +1128,14 @@ void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instr
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderX86::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
HVecMemoryOperation* instruction,
@@ -1103,7 +1160,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1184,7 +1241,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1220,7 +1277,7 @@ void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
index a77c7d6838..9c2882766c 100644
--- a/compiler/optimizing/code_generator_vector_x86_64.cc
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -49,7 +49,7 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru
: Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -102,7 +102,7 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar
__ shufpd(dst, dst, Immediate(0));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -126,7 +126,7 @@ void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instructio
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -140,7 +140,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -157,7 +157,7 @@ void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* in
DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -179,7 +179,7 @@ static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -188,8 +188,8 @@ void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) {
CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
// Long reduction or min/max require a temporary.
if (instruction->GetPackedType() == DataType::Type::kInt64 ||
- instruction->GetKind() == HVecReduce::kMin ||
- instruction->GetKind() == HVecReduce::kMax) {
+ instruction->GetReductionKind() == HVecReduce::kMin ||
+ instruction->GetReductionKind() == HVecReduce::kMax) {
instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
}
}
@@ -201,38 +201,23 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
switch (instruction->GetPackedType()) {
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ movaps(dst, src);
__ phaddd(dst, dst);
__ phaddd(dst, dst);
break;
- case HVecReduce::kMin: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pminsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pminsd(dst, tmp);
- break;
- }
- case HVecReduce::kMax: {
- XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- __ movaps(tmp, src);
- __ movaps(dst, src);
- __ psrldq(tmp, Immediate(8));
- __ pmaxsd(dst, tmp);
- __ psrldq(tmp, Immediate(4));
- __ pmaxsd(dst, tmp);
- break;
- }
+ case HVecReduce::kMin:
+ case HVecReduce::kMax:
+ // Historical note: We've had a broken implementation here. b/117863065
+ // Do not draw on the old code if we ever want to bring MIN/MAX reduction back.
+ LOG(FATAL) << "Unsupported reduction type.";
}
break;
case DataType::Type::kInt64: {
DCHECK_EQ(2u, instruction->GetVectorLength());
XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- switch (instruction->GetKind()) {
+ switch (instruction->GetReductionKind()) {
case HVecReduce::kSum:
__ movaps(tmp, src);
__ movaps(dst, src);
@@ -241,12 +226,12 @@ void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) {
break;
case HVecReduce::kMin:
case HVecReduce::kMax:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
break;
}
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -265,7 +250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
DCHECK_EQ(4u, instruction->GetVectorLength());
__ cvtdq2ps(dst, src);
} else {
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
}
}
@@ -311,7 +296,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -352,7 +337,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -401,7 +386,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -424,7 +409,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -466,7 +451,39 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
__ addpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ paddsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ paddsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -486,14 +503,14 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct
switch (instruction->GetPackedType()) {
case DataType::Type::kUint8:
DCHECK_EQ(16u, instruction->GetVectorLength());
- __ pavgb(dst, src);
- return;
+ __ pavgb(dst, src);
+ break;
case DataType::Type::kUint16:
DCHECK_EQ(8u, instruction->GetVectorLength());
__ pavgw(dst, src);
- return;
+ break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -535,7 +552,39 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
__ subpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
+ UNREACHABLE();
+ }
+}
+
+void LocationsBuilderX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSaturationSub(HVecSaturationSub* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ DCHECK(locations->InAt(0).Equals(locations->Out()));
+ XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+ XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+ switch (instruction->GetPackedType()) {
+ case DataType::Type::kUint8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubusb(dst, src);
+ break;
+ case DataType::Type::kInt8:
+ DCHECK_EQ(16u, instruction->GetVectorLength());
+ __ psubsb(dst, src);
+ break;
+ case DataType::Type::kUint16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubusw(dst, src);
+ break;
+ case DataType::Type::kInt16:
+ DCHECK_EQ(8u, instruction->GetVectorLength());
+ __ psubsw(dst, src);
+ break;
+ default:
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -568,7 +617,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
__ mulpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -592,7 +641,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
__ divpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -641,7 +690,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) {
__ minpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -690,7 +739,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) {
__ maxpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -725,7 +774,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
__ andpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -760,7 +809,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
__ andnpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -795,7 +844,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
__ orpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -830,7 +879,7 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
__ xorpd(dst, src);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -848,7 +897,7 @@ static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperati
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -877,7 +926,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
__ psllq(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -902,7 +951,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
__ psrad(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -931,7 +980,7 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
__ psrlq(dst, Immediate(static_cast<int8_t>(value)));
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -963,7 +1012,7 @@ void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
locations->SetOut(Location::RequiresFpuRegister());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -989,7 +1038,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
case DataType::Type::kInt8:
case DataType::Type::kUint16:
case DataType::Type::kInt16: // TODO: up to here, and?
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
case DataType::Type::kInt32:
DCHECK_EQ(4u, instruction->GetVectorLength());
@@ -1008,7 +1057,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruct
__ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1029,7 +1078,7 @@ static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* in
locations->SetOut(Location::SameAsFirstInput());
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1052,6 +1101,14 @@ void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* in
LOG(FATAL) << "No SIMD for " << instruction->GetId();
}
+void LocationsBuilderX86_64::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecDotProd(HVecDotProd* instruction) {
+ LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
// Helper to set up locations for vector memory operations.
static void CreateVecMemLocations(ArenaAllocator* allocator,
HVecMemoryOperation* instruction,
@@ -1076,7 +1133,7 @@ static void CreateVecMemLocations(ArenaAllocator* allocator,
}
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1157,7 +1214,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
@@ -1193,7 +1250,7 @@ void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
break;
default:
- LOG(FATAL) << "Unsupported SIMD type";
+ LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
UNREACHABLE();
}
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 6bf045885d..95118b0b6d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -23,6 +23,7 @@
#include "entrypoints/quick/quick_entrypoints.h"
#include "entrypoints/quick/quick_entrypoints_enum.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86.h"
@@ -51,6 +52,18 @@ static constexpr int kC2ConditionMask = 0x400;
static constexpr int kFakeReturnRegister = Register(8);
+static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
+static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
+
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ InvokeRuntimeCallingConvention calling_convention;
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+ // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+ // that the the kPrimNot result register is the same as the first argument register.
+ return caller_saves;
+}
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86PointerSize, x).Int32Value()
@@ -59,7 +72,7 @@ class NullCheckSlowPathX86 : public SlowPathCode {
public:
explicit NullCheckSlowPathX86(HNullCheck* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -73,9 +86,9 @@ class NullCheckSlowPathX86 : public SlowPathCode {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86);
@@ -85,16 +98,16 @@ class DivZeroCheckSlowPathX86 : public SlowPathCode {
public:
explicit DivZeroCheckSlowPathX86(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
x86_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86);
@@ -105,7 +118,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode {
DivRemMinusOneSlowPathX86(HInstruction* instruction, Register reg, bool is_div)
: SlowPathCode(instruction), reg_(reg), is_div_(is_div) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
__ Bind(GetEntryLabel());
if (is_div_) {
__ negl(reg_);
@@ -115,7 +128,7 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86"; }
+ const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86"; }
private:
Register reg_;
@@ -127,7 +140,7 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
public:
explicit BoundsCheckSlowPathX86(HBoundsCheck* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
@@ -174,9 +187,9 @@ class BoundsCheckSlowPathX86 : public SlowPathCode {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86);
@@ -187,7 +200,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
SuspendCheckSlowPathX86(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCode(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
@@ -211,7 +224,7 @@ class SuspendCheckSlowPathX86 : public SlowPathCode {
return successor_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathX86"; }
private:
HBasicBlock* const successor_;
@@ -224,7 +237,7 @@ class LoadStringSlowPathX86 : public SlowPathCode {
public:
explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -243,7 +256,7 @@ class LoadStringSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
@@ -251,36 +264,42 @@ class LoadStringSlowPathX86 : public SlowPathCode {
class LoadClassSlowPathX86 : public SlowPathCode {
public:
- LoadClassSlowPathX86(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ LoadClassSlowPathX86(HLoadClass* cls, HInstruction* at)
+ : SlowPathCode(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
InvokeRuntimeCallingConvention calling_convention;
- dex::TypeIndex type_index = cls_->GetTypeIndex();
- __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
- x86_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage
- : kQuickInitializeType,
- instruction_,
- dex_pc_,
- this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ movl(calling_convention.GetRegisterAt(0), Immediate(type_index.index_));
+ x86_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ x86_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), source);
+ }
+ if (must_do_clinit) {
+ x86_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
// Move the class to the desired location.
- Location out = locations->Out();
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
x86_codegen->Move32(out, Location::RegisterLocation(EAX));
@@ -289,18 +308,12 @@ class LoadClassSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathX86"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86);
};
@@ -309,7 +322,7 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
TypeCheckSlowPathX86(HInstruction* instruction, bool is_fatal)
: SlowPathCode(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(instruction_->IsCheckCast()
|| !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -362,8 +375,8 @@ class TypeCheckSlowPathX86 : public SlowPathCode {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathX86"; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -376,7 +389,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
explicit DeoptimizationSlowPathX86(HDeoptimize* instruction)
: SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -389,7 +402,7 @@ class DeoptimizationSlowPathX86 : public SlowPathCode {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86);
@@ -399,7 +412,7 @@ class ArraySetSlowPathX86 : public SlowPathCode {
public:
explicit ArraySetSlowPathX86(HInstruction* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -430,7 +443,7 @@ class ArraySetSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
@@ -458,9 +471,9 @@ class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86"; }
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -545,9 +558,9 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
+ const char* GetDescription() const override { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register ref_reg = ref_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -711,7 +724,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
@@ -830,7 +843,7 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathX86"; }
+ const char* GetDescription() const override { return "ReadBarrierForHeapReferenceSlowPathX86"; }
private:
Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
@@ -870,7 +883,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
Register reg_out = out_.AsRegister<Register>();
DCHECK(locations->CanCall());
@@ -896,7 +909,7 @@ class ReadBarrierForRootSlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86"; }
private:
const Location out_;
@@ -954,6 +967,10 @@ void CodeGeneratorX86::DumpFloatingPointRegister(std::ostream& stream, int reg)
stream << XmmRegister(reg);
}
+const X86InstructionSetFeatures& CodeGeneratorX86::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86InstructionSetFeatures();
+}
+
size_t CodeGeneratorX86::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ movl(Address(ESP, stack_index), static_cast<Register>(reg_id));
return kX86WordSize;
@@ -1005,7 +1022,6 @@ void CodeGeneratorX86::GenerateInvokeRuntime(int32_t entry_point_offset) {
}
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
@@ -1023,13 +1039,13 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph,
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator()),
- isa_features_(isa_features),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
constant_area_start_(-1),
@@ -1212,7 +1228,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type ty
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
- break;
+ UNREACHABLE();
}
return Location::NoLocation();
}
@@ -1704,7 +1720,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -1722,9 +1738,9 @@ void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) {
void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86>(deoptimize);
GenerateTestAndBranch<Label>(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
@@ -1847,7 +1863,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) {
} else {
NearLabel false_target;
GenerateTestAndBranch<NearLabel>(
- select, /* condition_input_index */ 2, /* true_target */ nullptr, &false_target);
+ select, /* condition_input_index= */ 2, /* true_target= */ nullptr, &false_target);
codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
__ Bind(&false_target);
}
@@ -2185,7 +2201,9 @@ void LocationsBuilderX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invok
IntrinsicLocationsBuilderX86 intrinsic(codegen_);
if (intrinsic.TryDispatch(invoke)) {
- if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
+ if (invoke->GetLocations()->CanCall() &&
+ invoke->HasPcRelativeMethodLoadKind() &&
+ invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).IsInvalid()) {
invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
}
return;
@@ -2308,6 +2326,14 @@ void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* inv
codegen_->GenerateInvokePolymorphicCall(invoke);
}
+void LocationsBuilderX86::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorX86::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+}
+
void LocationsBuilderX86::VisitNeg(HNeg* neg) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
@@ -2963,7 +2989,7 @@ void LocationsBuilderX86::VisitAdd(HAdd* add) {
default:
LOG(FATAL) << "Unexpected add type " << add->GetResultType();
- break;
+ UNREACHABLE();
}
}
@@ -3408,8 +3434,8 @@ void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) {
// Load the values to the FP stack in reverse order, using temporaries if needed.
const bool is_wide = !is_float;
- PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp */ true, is_wide);
- PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp */ true, is_wide);
+ PushOntoFPStack(second, elem_size, 2 * elem_size, /* is_fp= */ true, is_wide);
+ PushOntoFPStack(first, 0, 2 * elem_size, /* is_fp= */ true, is_wide);
// Loop doing FPREM until we stabilize.
NearLabel retry;
@@ -3471,6 +3497,27 @@ void InstructionCodeGeneratorX86::DivRemOneOrMinusOne(HBinaryOperation* instruct
}
}
+void InstructionCodeGeneratorX86::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+
+ Register out = locations->Out().AsRegister<Register>();
+ Register numerator = locations->InAt(0).AsRegister<Register>();
+
+ int32_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
+
+ Register tmp = locations->GetTemp(0).AsRegister<Register>();
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmovl(Condition::kLess, out, tmp);
+ __ Bind(&done);
+}
void InstructionCodeGeneratorX86::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
@@ -3525,7 +3572,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemWithAnyConstant(HBinaryOperation
int64_t magic;
int shift;
- CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, /* is_long= */ false, &magic, &shift);
// Save the numerator.
__ movl(num, eax);
@@ -3584,8 +3631,12 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr
// Do not generate anything for 0. DivZeroCheck would forbid any generated code.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (is_div && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
@@ -3802,6 +3853,301 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ // Register to use to perform a long subtract to set cc.
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ if (type == DataType::Type::kInt64) {
+ // Need to perform a subtract to get the sign right.
+ // op1 is already in the same location as the output.
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+
+ Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
+ Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
+
+ // The comparison is performed by subtracting the second operand from
+ // the first operand and then setting the status flags in the same
+ // manner as the SUB instruction."
+ __ cmpl(output_lo, op2_lo);
+
+ // Now use a temp and the borrow to finish the subtraction of op2_hi.
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(temp, output_hi);
+ __ sbbl(temp, op2_hi);
+
+ // Now the condition code is correct.
+ Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
+ __ cmovl(cond, output_lo, op2_lo);
+ __ cmovl(cond, output_hi, op2_hi);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ Register out = locations->Out().AsRegister<Register>();
+ Register op2 = op2_loc.AsRegister<Register>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ __ cmpl(out, op2);
+ Condition cond = is_min ? Condition::kGreater : Condition::kLess;
+ __ cmovl(cond, out, op2);
+ }
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(out, kDoubleNaN);
+ } else {
+ Register constant = locations->GetTemp(0).AsRegister<Register>();
+ __ movl(constant, Immediate(kFloatNaN));
+ __ movd(out, constant);
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ locations->SetInAt(0, Location::RegisterLocation(EAX));
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RegisterLocation(EDX));
+ break;
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ Register out = locations->Out().AsRegister<Register>();
+ DCHECK_EQ(out, EAX);
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ DCHECK_EQ(temp, EDX);
+ // Sign extend EAX into EDX.
+ __ cdq();
+ // XOR EAX with sign.
+ __ xorl(EAX, EDX);
+ // Subtract out sign to correct.
+ __ subl(EAX, EDX);
+ // The result is in EAX.
+ break;
+ }
+ case DataType::Type::kInt64: {
+ Location input = locations->InAt(0);
+ Register input_lo = input.AsRegisterPairLow<Register>();
+ Register input_hi = input.AsRegisterPairHigh<Register>();
+ Location output = locations->Out();
+ Register output_lo = output.AsRegisterPairLow<Register>();
+ Register output_hi = output.AsRegisterPairHigh<Register>();
+ Register temp = locations->GetTemp(0).AsRegister<Register>();
+ // Compute the sign into the temporary.
+ __ movl(temp, input_hi);
+ __ sarl(temp, Immediate(31));
+ // Store the sign into the output.
+ __ movl(output_lo, temp);
+ __ movl(output_hi, temp);
+ // XOR the input to the output.
+ __ xorl(output_lo, input_lo);
+ __ xorl(output_hi, input_hi);
+ // Subtract the sign.
+ __ subl(output_lo, temp);
+ __ sbbl(output_hi, temp);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ Register constant = locations->GetTemp(1).AsRegister<Register>();
+ __ movl(constant, Immediate(INT32_C(0x7FFFFFFF)));
+ __ movd(temp, constant);
+ __ andps(out, temp);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ // TODO: Use a constant from the constant table (requires extra input).
+ __ LoadLongConstant(temp, INT64_C(0x7FFFFFFFFFFFFFFF));
+ __ andpd(out, temp);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
switch (instruction->GetType()) {
@@ -4184,29 +4530,14 @@ void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
locations->SetOut(Location::RegisterLocation(EAX));
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- InvokeRuntimeCallingConvention calling_convention;
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ InvokeRuntimeCallingConvention calling_convention;
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
}
void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize);
- __ fs()->movl(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString)));
- __ call(Address(temp, code_offset.Int32Value()));
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
@@ -4219,10 +4550,8 @@ void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
@@ -4472,14 +4801,14 @@ void CodeGeneratorX86::GenerateMemoryBarrier(MemBarrierKind kind) {
}
case MemBarrierKind::kNTStoreStore:
// Non-Temporal Store/Store needs an explicit fence.
- MemoryFence(/* non-temporal */ true);
+ MemoryFence(/* non-temporal= */ true);
break;
}
}
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
return desired_dispatch_info;
}
@@ -4531,9 +4860,15 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(
RecordBootImageMethodPatch(invoke);
break;
}
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+ temp.AsRegister<Register>());
+ __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
+ RecordBootImageRelRoPatch(
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
+ GetBootImageOffset(invoke));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
temp.AsRegister<Register>());
@@ -4541,6 +4876,9 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall(
RecordMethodBssEntryPatch(invoke);
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -4595,6 +4933,20 @@ void CodeGeneratorX86::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86::RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t intrinsic_data) {
+ boot_image_intrinsic_patches_.emplace_back(
+ method_address, /* target_dex_file= */ nullptr, intrinsic_data);
+ __ Bind(&boot_image_intrinsic_patches_.back().label);
+}
+
+void CodeGeneratorX86::RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(
+ method_address, /* target_dex_file= */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
HX86ComputeBaseMethodAddress* method_address =
@@ -4639,7 +4991,6 @@ void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) {
}
Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
- DCHECK(!GetCompilerOptions().IsBootImage());
HX86ComputeBaseMethodAddress* method_address =
load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
string_bss_entry_patches_.emplace_back(
@@ -4647,6 +4998,62 @@ Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
return &string_bss_entry_patches_.back().label;
}
+void CodeGeneratorX86::LoadBootImageAddress(Register reg,
+ uint32_t boot_image_reference,
+ HInvokeStaticOrDirect* invoke) {
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ HX86ComputeBaseMethodAddress* method_address =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+ DCHECK(method_address != nullptr);
+ Register method_address_reg =
+ invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
+ __ leal(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
+ RecordBootImageIntrinsicPatch(method_address, boot_image_reference);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ HX86ComputeBaseMethodAddress* method_address =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+ DCHECK(method_address != nullptr);
+ Register method_address_reg =
+ invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
+ __ movl(reg, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
+ RecordBootImageRelRoPatch(method_address, boot_image_reference);
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
+ __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
+ }
+}
+
+void CodeGeneratorX86::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConvention calling_convention;
+ Register argument = calling_convention.GetRegisterAt(0);
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ HX86ComputeBaseMethodAddress* method_address =
+ invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+ DCHECK(method_address != nullptr);
+ Register method_address_reg =
+ invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()).AsRegister<Register>();
+ __ leal(argument, Address(method_address_reg, CodeGeneratorX86::kDummy32BitOffset));
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ boot_image_type_patches_.emplace_back(method_address, target_method.dex_file, type_idx.index_);
+ __ Bind(&boot_image_type_patches_.back().label);
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset, invoke);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
// The label points to the end of the "movl" or another instruction but the literal offset
// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
@@ -4664,6 +5071,15 @@ inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -4672,7 +5088,8 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- string_bss_entry_patches_.size();
+ string_bss_entry_patches_.size() +
+ boot_image_intrinsic_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
@@ -4681,12 +5098,14 @@ void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linke
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -4707,9 +5126,25 @@ void CodeGeneratorX86::MarkGCCard(Register temp,
__ testl(value, value);
__ j(kEqual, &is_null);
}
+ // Load the address of the card table into `card`.
__ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86PointerSize>().Int32Value()));
+ // Calculate the offset (in the card table) of the card corresponding to
+ // `object`.
__ movl(temp, object);
__ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0),
X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
if (value_can_be_null) {
@@ -4801,7 +5236,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction,
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check= */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -5284,7 +5719,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
} else {
Register out = out_loc.AsRegister<Register>();
__ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset));
@@ -6055,14 +6490,14 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -6093,7 +6528,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
+ load_kind == HLoadClass::LoadKind::kBootImageRelRo ||
load_kind == HLoadClass::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6101,10 +6536,7 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -6149,7 +6581,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
cls,
out_loc,
Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
- /* fixup_label */ nullptr,
+ /* fixup_label= */ nullptr,
read_barrier_option);
break;
}
@@ -6161,25 +6593,12 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
codegen_->RecordBootImageTypePatch(cls);
break;
}
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- __ movl(out, Immediate(address));
- break;
- }
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ codegen_->RecordBootImageRelRoPatch(cls->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
@@ -6190,6 +6609,13 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ movl(out, Immediate(address));
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress: {
Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
Label* fixup_label = codegen_->NewJitRootClassPatch(
@@ -6206,8 +6632,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
- SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
@@ -6223,6 +6648,26 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE
}
}
+void LocationsBuilderX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorX86::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderX86::VisitLoadMethodType(HLoadMethodType* load) {
+ InvokeRuntimeCallingConvention calling_convention;
+ Location location = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorX86::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
+}
+
void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
@@ -6230,12 +6675,14 @@ void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
}
void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) {
// We assume the class to not be null.
- SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(
- check->GetLoadClass(), check, check->GetDexPc(), true);
+ SlowPathCode* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path,
check->GetLocations()->InAt(0).AsRegister<Register>());
@@ -6255,18 +6702,38 @@ void InstructionCodeGeneratorX86::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the X86 memory model.
}
+void InstructionCodeGeneratorX86::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ Register temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -6278,7 +6745,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
HLoadString::LoadKind load_kind = load->GetLoadKind();
if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
+ load_kind == HLoadString::LoadKind::kBootImageRelRo ||
load_kind == HLoadString::LoadKind::kBssEntry) {
locations->SetInAt(0, Location::RequiresRegister());
}
@@ -6289,10 +6756,7 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
if (load_kind == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
- RegisterSet caller_saves = RegisterSet::Empty();
- InvokeRuntimeCallingConvention calling_convention;
- caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -6325,18 +6789,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
codegen_->RecordBootImageStringPatch(load);
return;
}
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- __ movl(out, Immediate(address));
- return;
- }
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
Register method_address = locations->InAt(0).AsRegister<Register>();
__ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
- codegen_->RecordBootImageStringPatch(load);
+ codegen_->RecordBootImageRelRoPatch(load->InputAt(0)->AsX86ComputeBaseMethodAddress(),
+ codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
@@ -6352,6 +6810,12 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ movl(out, Immediate(address));
+ return;
+ }
case HLoadString::LoadKind::kJitTableAddress: {
Address address = Address::Absolute(CodeGeneratorX86::kDummy32BitOffset);
Label* fixup_label = codegen_->NewJitRootStringPatch(
@@ -6418,8 +6882,8 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
return 0;
}
-// Interface case has 3 temps, one for holding the number of interfaces, one for the current
-// interface pointer, one for loading the current interface.
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
// The other checks have one temp for loading the object's class.
static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
@@ -6447,6 +6911,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -6455,7 +6921,13 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
// When read barriers are enabled, we need a temporary register for some cases.
@@ -6636,7 +7108,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ j(kNotEqual, slow_path->GetEntryLabel());
__ movl(out, Immediate(1));
@@ -6668,7 +7140,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ jmp(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
@@ -6676,6 +7148,21 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6702,12 +7189,14 @@ void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
- // Note that TypeCheckSlowPathX86 uses this "temp" register too.
- locations->AddTemp(Location::RequiresRegister());
- // When read barriers are enabled, we need an additional temporary register for some cases.
+ // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
@@ -6921,6 +7410,19 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
__ MaybeUnpoisonHeapReference(cls.AsRegister<Register>());
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
__ Bind(&done);
@@ -6946,6 +7448,61 @@ void InstructionCodeGeneratorX86::VisitMonitorOperation(HMonitorOperation* instr
}
}
+void LocationsBuilderX86::VisitX86AndNot(HX86AndNot* instruction) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86::VisitX86AndNot(HX86AndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ Location dest = locations->Out();
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ __ andn(dest.AsRegister<Register>(),
+ first.AsRegister<Register>(),
+ second.AsRegister<Register>());
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ __ andn(dest.AsRegisterPairLow<Register>(),
+ first.AsRegisterPairLow<Register>(),
+ second.AsRegisterPairLow<Register>());
+ __ andn(dest.AsRegisterPairHigh<Register>(),
+ first.AsRegisterPairHigh<Register>(),
+ second.AsRegisterPairHigh<Register>());
+ }
+}
+
+void LocationsBuilderX86::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ DCHECK(instruction->GetType() == DataType::Type::kInt32) << instruction->GetType();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86::VisitX86MaskOrResetLeastSetBit(
+ HX86MaskOrResetLeastSetBit* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location src = locations->InAt(0);
+ Location dest = locations->Out();
+ DCHECK(instruction->GetResultType() == DataType::Type::kInt32);
+ switch (instruction->GetOpKind()) {
+ case HInstruction::kAnd:
+ __ blsr(dest.AsRegister<Register>(), src.AsRegister<Register>());
+ break;
+ case HInstruction::kXor:
+ __ blsmsk(dest.AsRegister<Register>(), src.AsRegister<Register>());
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ }
+}
+
void LocationsBuilderX86::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
void LocationsBuilderX86::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
@@ -7092,7 +7649,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -7126,7 +7683,7 @@ void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -7175,7 +7732,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
// Slow path marking the GC root `root`.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
- instruction, root, /* unpoison_ref_before_marking */ false);
+ instruction, root, /* unpoison_ref_before_marking= */ false);
codegen_->AddSlowPath(slow_path);
// Test the entrypoint (`Thread::Current()->pReadBarrierMarkReg ## root.reg()`).
@@ -7277,7 +7834,7 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
// Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
@@ -7305,10 +7862,10 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i
if (always_update_field) {
DCHECK(temp != nullptr);
slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
- instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
+ instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp);
} else {
slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86(
- instruction, ref, /* unpoison_ref_before_marking */ true);
+ instruction, ref, /* unpoison_ref_before_marking= */ true);
}
AddSlowPath(slow_path);
@@ -7620,7 +8177,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera
HX86ComputeBaseMethodAddress* base_method_address_;
private:
- void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ void Process(const MemoryRegion& region, int pos) override {
// Patch the correct offset for the instruction. The place to patch is the
// last 4 bytes of the instruction.
// The value to patch is the distance from the offset in the constant area
@@ -7821,7 +8378,7 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code,
uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
uintptr_t address =
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
- typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+ using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
dchecked_integral_cast<uint32_t>(address);
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 51e5bca00b..deeef888e2 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -83,9 +83,9 @@ class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVi
InvokeDexCallingConventionVisitorX86() {}
virtual ~InvokeDexCallingConventionVisitorX86() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type type) const OVERRIDE;
- Location GetMethodLocation() const OVERRIDE;
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConvention calling_convention;
@@ -97,18 +97,18 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return Location::RegisterLocation(ECX);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(EAX);
}
- Location GetReturnLocation(DataType::Type type) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type) const override {
return DataType::Is64BitType(type)
? Location::RegisterPairLocation(EAX, EDX)
: Location::RegisterLocation(EAX);
}
- Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE {
+ Location GetSetValueLocation(DataType::Type type, bool is_instance) const override {
return DataType::Is64BitType(type)
? (is_instance
? Location::RegisterPairLocation(EDX, EBX)
@@ -117,7 +117,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention {
? Location::RegisterLocation(EDX)
: Location::RegisterLocation(ECX));
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -130,10 +130,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap {
ParallelMoveResolverX86(ArenaAllocator* allocator, CodeGeneratorX86* codegen)
: ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
- void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
X86Assembler* GetAssembler() const;
@@ -155,14 +155,15 @@ class LocationsBuilderX86 : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -186,14 +187,15 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -211,10 +213,12 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* check, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, Register class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, Register temp);
void HandleBitwiseOperation(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateRemFP(HRem* rem);
void HandleCondition(HCondition* condition);
@@ -225,6 +229,9 @@ class InstructionCodeGeneratorX86 : public InstructionCodeGenerator {
void GenerateShlLong(const Location& loc, int shift);
void GenerateShrLong(const Location& loc, int shift);
void GenerateUShrLong(const Location& loc, int shift);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
void HandleFieldSet(HInstruction* instruction,
const FieldInfo& field_info,
@@ -312,28 +319,27 @@ class JumpTableRIPFixup;
class CodeGeneratorX86 : public CodeGenerator {
public:
CodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorX86() {}
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
- void Bind(HBasicBlock* block) OVERRIDE;
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
+ void Bind(HBasicBlock* block) override;
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -343,49 +349,51 @@ class CodeGeneratorX86 : public CodeGenerator {
void GenerateInvokeRuntime(int32_t entry_point_offset);
- size_t GetWordSize() const OVERRIDE {
+ size_t GetWordSize() const override {
return kX86WordSize;
}
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ size_t GetFloatingPointSpillSlotSize() const override {
return GetGraph()->HasSIMD()
? 4 * kX86WordSize // 16 bytes == 4 words for each spill
: 2 * kX86WordSize; // 8 bytes == 2 words for each spill
}
- HGraphVisitor* GetLocationBuilder() OVERRIDE {
+ HGraphVisitor* GetLocationBuilder() override {
return &location_builder_;
}
- HGraphVisitor* GetInstructionVisitor() OVERRIDE {
+ HGraphVisitor* GetInstructionVisitor() override {
return &instruction_visitor_;
}
- X86Assembler* GetAssembler() OVERRIDE {
+ X86Assembler* GetAssembler() override {
return &assembler_;
}
- const X86Assembler& GetAssembler() const OVERRIDE {
+ const X86Assembler& GetAssembler() const override {
return assembler_;
}
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters() const OVERRIDE;
+ void SetupBlockedRegisters() const override;
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
- ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
+ ParallelMoveResolverX86* GetMoveResolver() override {
return &move_resolver_;
}
- InstructionSet GetInstructionSet() const OVERRIDE {
+ InstructionSet GetInstructionSet() const override {
return InstructionSet::kX86;
}
+ const X86InstructionSetFeatures& GetInstructionSetFeatures() const;
+
// Helper method to move a 32bits value between two locations.
void Move32(Location destination, Location source);
// Helper method to move a 64bits value between two locations.
@@ -394,32 +402,42 @@ class CodeGeneratorX86 : public CodeGenerator {
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
// Generate a call to a static or direct method.
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
// Generate a call to a virtual method.
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
+ void RecordBootImageIntrinsicPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t intrinsic_data);
+ void RecordBootImageRelRoPatch(HX86ComputeBaseMethodAddress* method_address,
+ uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
Label* NewTypeBssEntryPatch(HLoadClass* load_class);
void RecordBootImageStringPatch(HLoadString* load_string);
Label* NewStringBssEntryPatch(HLoadString* load_string);
+
+ void LoadBootImageAddress(Register reg,
+ uint32_t boot_image_reference,
+ HInvokeStaticOrDirect* invoke);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
+
Label* NewJitRootStringPatch(const DexFile& dex_file,
dex::StringIndex string_index,
Handle<mirror::String> handle);
@@ -427,16 +445,16 @@ class CodeGeneratorX86 : public CodeGenerator {
dex::TypeIndex type_index,
Handle<mirror::Class> handle);
- void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE;
+ void MoveFromReturnRegister(Location trg, DataType::Type type) override;
// Emit linker patches.
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
const PatchInfo<Label>& info,
uint64_t index_in_table) const;
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Emit a write barrier.
void MarkGCCard(Register temp,
@@ -451,22 +469,18 @@ class CodeGeneratorX86 : public CodeGenerator {
return CommonGetLabelOf<Label>(block_labels_, block);
}
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_ = CommonInitializeLabels<Label>();
}
- bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE {
+ bool NeedsTwoRegisters(DataType::Type type) const override {
return type == DataType::Type::kInt64;
}
- bool ShouldSplitLongMoves() const OVERRIDE { return true; }
+ bool ShouldSplitLongMoves() const override { return true; }
Label* GetFrameEntryLabel() { return &frame_entry_label_; }
- const X86InstructionSetFeatures& GetInstructionSetFeatures() const {
- return isa_features_;
- }
-
void AddMethodAddressOffset(HX86ComputeBaseMethodAddress* method_base, int32_t offset) {
method_address_offset_.Put(method_base->GetId(), offset);
}
@@ -502,7 +516,7 @@ class CodeGeneratorX86 : public CodeGenerator {
Address LiteralCaseTable(HX86PackedSwitch* switch_instr, Register reg, Register value);
- void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void Finalize(CodeAllocator* allocator) override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
@@ -598,9 +612,9 @@ class CodeGeneratorX86 : public CodeGenerator {
}
}
- void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateNop() override;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// The correct value will be inserted when processing Assembler fixups.
@@ -629,20 +643,22 @@ class CodeGeneratorX86 : public CodeGenerator {
InstructionCodeGeneratorX86 instruction_visitor_;
ParallelMoveResolverX86 move_resolver_;
X86Assembler assembler_;
- const X86InstructionSetFeatures& isa_features_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<X86PcRelativePatchInfo> boot_image_intrinsic_patches_;
// Patches for string root accesses in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 7be360536b..7c293b8605 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -22,6 +22,7 @@
#include "compiled_method.h"
#include "entrypoints/quick/quick_entrypoints.h"
#include "gc/accounting/card_table.h"
+#include "gc/space/image_space.h"
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86_64.h"
@@ -55,6 +56,13 @@ static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15
static constexpr int kC2ConditionMask = 0x400;
+static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
+ // Custom calling convention: RAX serves as both input and output.
+ RegisterSet caller_saves = RegisterSet::Empty();
+ caller_saves.Add(Location::RegisterLocation(RAX));
+ return caller_saves;
+}
+
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
#define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
@@ -63,7 +71,7 @@ class NullCheckSlowPathX86_64 : public SlowPathCode {
public:
explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
if (instruction_->CanThrowIntoCatchBlock()) {
@@ -77,9 +85,9 @@ class NullCheckSlowPathX86_64 : public SlowPathCode {
CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
+ const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
@@ -89,16 +97,16 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
public:
explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
+ const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
@@ -109,7 +117,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
: SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
__ Bind(GetEntryLabel());
if (type_ == DataType::Type::kInt32) {
if (is_div_) {
@@ -129,7 +137,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
+ const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
private:
const CpuRegister cpu_reg_;
@@ -143,7 +151,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
: SlowPathCode(instruction), successor_(successor) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
@@ -167,7 +175,7 @@ class SuspendCheckSlowPathX86_64 : public SlowPathCode {
return successor_;
}
- const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
+ const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
private:
HBasicBlock* const successor_;
@@ -181,7 +189,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
: SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
@@ -228,9 +236,9 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
}
- bool IsFatal() const OVERRIDE { return true; }
+ bool IsFatal() const override { return true; }
- const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
+ const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
@@ -238,34 +246,41 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode {
class LoadClassSlowPathX86_64 : public SlowPathCode {
public:
- LoadClassSlowPathX86_64(HLoadClass* cls,
- HInstruction* at,
- uint32_t dex_pc,
- bool do_clinit)
- : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+ LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
+ : SlowPathCode(at), cls_(cls) {
DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+ DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
+ Location out = locations->Out();
+ const uint32_t dex_pc = instruction_->GetDexPc();
+ bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
+ bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
+
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
-
SaveLiveRegisters(codegen, locations);
// Custom calling convention: RAX serves as both input and output.
- __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
- x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
- instruction_,
- dex_pc_,
- this);
- if (do_clinit_) {
- CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+ if (must_resolve_type) {
+ DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
+ dex::TypeIndex type_index = cls_->GetTypeIndex();
+ __ movl(CpuRegister(RAX), Immediate(type_index.index_));
+ x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
+ // If we also must_do_clinit, the resolved type is now in the correct register.
} else {
- CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+ DCHECK(must_do_clinit);
+ Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
+ x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
+ }
+ if (must_do_clinit) {
+ x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
+ CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
}
- Location out = locations->Out();
// Move the class to the desired location.
if (out.IsValid()) {
DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
@@ -276,18 +291,12 @@ class LoadClassSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
+ const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
private:
// The class this slow path will load.
HLoadClass* const cls_;
- // The dex PC of `at_`.
- const uint32_t dex_pc_;
-
- // Whether to initialize the class.
- const bool do_clinit_;
-
DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
};
@@ -295,7 +304,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
public:
explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -317,7 +326,7 @@ class LoadStringSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
+ const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
@@ -328,7 +337,7 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
: SlowPathCode(instruction), is_fatal_(is_fatal) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
uint32_t dex_pc = instruction_->GetDexPc();
DCHECK(instruction_->IsCheckCast()
@@ -376,9 +385,9 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode {
}
}
- const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
+ const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
- bool IsFatal() const OVERRIDE { return is_fatal_; }
+ bool IsFatal() const override { return is_fatal_; }
private:
const bool is_fatal_;
@@ -391,7 +400,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
: SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
__ Bind(GetEntryLabel());
LocationSummary* locations = instruction_->GetLocations();
@@ -404,7 +413,7 @@ class DeoptimizationSlowPathX86_64 : public SlowPathCode {
CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
}
- const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
+ const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
@@ -414,7 +423,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
public:
explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
__ Bind(GetEntryLabel());
SaveLiveRegisters(codegen, locations);
@@ -445,7 +454,7 @@ class ArraySetSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
+ const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
@@ -473,9 +482,9 @@ class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
+ const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
Register ref_reg = ref_cpu_reg.AsRegister();
@@ -564,11 +573,11 @@ class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
Register ref_reg = ref_cpu_reg.AsRegister();
@@ -736,7 +745,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
CpuRegister reg_out = out_.AsRegister<CpuRegister>();
@@ -855,7 +864,7 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierForHeapReferenceSlowPathX86_64";
}
@@ -897,7 +906,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
DCHECK(kEmitCompilerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
@@ -922,7 +931,7 @@ class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
+ const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
private:
const Location out_;
@@ -969,7 +978,7 @@ inline Condition X86_64FPCondition(IfCondition cond) {
HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+ ArtMethod* method ATTRIBUTE_UNUSED) {
return desired_dispatch_info;
}
@@ -983,7 +992,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
// temp = thread->string_init_entrypoint
uint32_t offset =
GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
- __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
+ __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
break;
}
case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
@@ -992,18 +1001,25 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
DCHECK(GetCompilerOptions().IsBootImage());
__ leal(temp.AsRegister<CpuRegister>(),
- Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+ Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
RecordBootImageMethodPatch(invoke);
break;
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
+ // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
+ __ movl(temp.AsRegister<CpuRegister>(),
+ Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
+ RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
break;
+ }
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
__ movq(temp.AsRegister<CpuRegister>(),
- Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+ Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
RecordMethodBssEntryPatch(invoke);
break;
}
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
+ break;
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
return; // No code pointer retrieval; the runtime performs the call directly.
@@ -1059,6 +1075,16 @@ void CodeGeneratorX86_64::GenerateVirtualCall(
RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
}
+void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
+ boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
+ __ Bind(&boot_image_intrinsic_patches_.back().label);
+}
+
+void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
+ boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
+ __ Bind(&boot_image_method_patches_.back().label);
+}
+
void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
boot_image_method_patches_.emplace_back(
invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
@@ -1089,12 +1115,48 @@ void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
}
Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
- DCHECK(!GetCompilerOptions().IsBootImage());
string_bss_entry_patches_.emplace_back(
&load_string->GetDexFile(), load_string->GetStringIndex().index_);
return &string_bss_entry_patches_.back().label;
}
+void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
+ if (GetCompilerOptions().IsBootImage()) {
+ __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
+ RecordBootImageIntrinsicPatch(boot_image_reference);
+ } else if (GetCompilerOptions().GetCompilePic()) {
+ __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
+ RecordBootImageRelRoPatch(boot_image_reference);
+ } else {
+ DCHECK(Runtime::Current()->UseJitCompilation());
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(!heap->GetBootImageSpaces().empty());
+ const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
+ __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
+ }
+}
+
+void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
+ uint32_t boot_image_offset) {
+ DCHECK(invoke->IsStatic());
+ InvokeRuntimeCallingConvention calling_convention;
+ CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+ if (GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
+ // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
+ __ leal(argument,
+ Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
+ MethodReference target_method = invoke->GetTargetMethod();
+ dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
+ boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
+ __ Bind(&boot_image_type_patches_.back().label);
+ } else {
+ LoadBootImageAddress(argument, boot_image_offset);
+ }
+ InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+}
+
// The label points to the end of the "movl" or another instruction but the literal offset
// for method patch needs to point to the embedded constant which occupies the last 4 bytes.
constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
@@ -1110,6 +1172,15 @@ inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
}
}
+template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
+linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
+ const DexFile* target_dex_file,
+ uint32_t pc_insn_offset,
+ uint32_t boot_image_offset) {
+ DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
+ return Factory(literal_offset, pc_insn_offset, boot_image_offset);
+}
+
void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
DCHECK(linker_patches->empty());
size_t size =
@@ -1118,7 +1189,8 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
boot_image_type_patches_.size() +
type_bss_entry_patches_.size() +
boot_image_string_patches_.size() +
- string_bss_entry_patches_.size();
+ string_bss_entry_patches_.size() +
+ boot_image_intrinsic_patches_.size();
linker_patches->reserve(size);
if (GetCompilerOptions().IsBootImage()) {
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
@@ -1127,12 +1199,14 @@ void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* li
boot_image_type_patches_, linker_patches);
EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
+ boot_image_intrinsic_patches_, linker_patches);
} else {
- DCHECK(boot_image_method_patches_.empty());
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>(
- boot_image_type_patches_, linker_patches);
- EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>(
- boot_image_string_patches_, linker_patches);
+ EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
+ boot_image_method_patches_, linker_patches);
+ DCHECK(boot_image_type_patches_.empty());
+ DCHECK(boot_image_string_patches_.empty());
+ DCHECK(boot_image_intrinsic_patches_.empty());
}
EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
method_bss_entry_patches_, linker_patches);
@@ -1151,6 +1225,10 @@ void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int re
stream << FloatRegister(reg);
}
+const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
+ return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
+}
+
size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
__ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
return kX86_64WordSize;
@@ -1198,14 +1276,13 @@ void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poin
}
void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
- __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
+ __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
}
static constexpr int kNumberOfCpuRegisterPairs = 0;
// Use a fake return address register to mimic Quick.
static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
- const X86_64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats)
: CodeGenerator(graph,
@@ -1224,7 +1301,6 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
instruction_visitor_(graph, this),
move_resolver_(graph->GetAllocator(), this),
assembler_(graph->GetAllocator()),
- isa_features_(isa_features),
constant_area_start_(0),
boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
@@ -1232,6 +1308,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
+ boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
@@ -1721,7 +1798,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
nullptr : codegen_->GetLabelOf(true_successor);
Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
nullptr : codegen_->GetLabelOf(false_successor);
- GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
+ GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
}
void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
@@ -1739,9 +1816,9 @@ void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
GenerateTestAndBranch<Label>(deoptimize,
- /* condition_input_index */ 0,
+ /* condition_input_index= */ 0,
slow_path->GetEntryLabel(),
- /* false_target */ nullptr);
+ /* false_target= */ nullptr);
}
void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
@@ -1844,8 +1921,8 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
} else {
NearLabel false_target;
GenerateTestAndBranch<NearLabel>(select,
- /* condition_input_index */ 2,
- /* true_target */ nullptr,
+ /* condition_input_index= */ 2,
+ /* true_target= */ nullptr,
&false_target);
codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
__ Bind(&false_target);
@@ -2347,7 +2424,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type
case DataType::Type::kUint64:
case DataType::Type::kVoid:
LOG(FATAL) << "Unexpected parameter type " << type;
- break;
+ UNREACHABLE();
}
return Location::NoLocation();
}
@@ -2482,6 +2559,14 @@ void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic*
codegen_->GenerateInvokePolymorphicCall(invoke);
}
+void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
+ codegen_->GenerateInvokeCustomCall(invoke);
+}
+
void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
LocationSummary* locations =
new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
@@ -3474,7 +3559,40 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr
LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
}
}
+void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location second = locations->InAt(1);
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
+ int64_t imm = Int64FromConstant(second.GetConstant());
+ DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
+ uint64_t abs_imm = AbsOrMin(imm);
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ NearLabel done;
+ __ movl(out, numerator);
+ __ andl(out, Immediate(abs_imm-1));
+ __ j(Condition::kZero, &done);
+ __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
+ __ testl(numerator, numerator);
+ __ cmov(Condition::kLess, out, tmp, false);
+ __ Bind(&done);
+
+ } else {
+ DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
+ codegen_->Load64BitValue(tmp, abs_imm - 1);
+ NearLabel done;
+ __ movq(out, numerator);
+ __ andq(out, tmp);
+ __ j(Condition::kZero, &done);
+ __ movq(tmp, numerator);
+ __ sarq(tmp, Immediate(63));
+ __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
+ __ orq(out, tmp);
+ __ Bind(&done);
+ }
+}
void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
LocationSummary* locations = instruction->GetLocations();
Location second = locations->InAt(1);
@@ -3489,9 +3607,17 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
if (instruction->GetResultType() == DataType::Type::kInt32) {
- __ leal(tmp, Address(numerator, abs_imm - 1));
- __ testl(numerator, numerator);
- __ cmov(kGreaterEqual, tmp, numerator);
+ // When denominator is equal to 2, we can add signed bit and numerator to tmp.
+ // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
+ if (abs_imm == 2) {
+ __ leal(tmp, Address(numerator, 0));
+ __ shrl(tmp, Immediate(31));
+ __ addl(tmp, numerator);
+ } else {
+ __ leal(tmp, Address(numerator, abs_imm - 1));
+ __ testl(numerator, numerator);
+ __ cmov(kGreaterEqual, tmp, numerator);
+ }
int shift = CTZ(imm);
__ sarl(tmp, Immediate(shift));
@@ -3503,11 +3629,16 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
} else {
DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
-
- codegen_->Load64BitValue(rdx, abs_imm - 1);
- __ addq(rdx, numerator);
- __ testq(numerator, numerator);
- __ cmov(kGreaterEqual, rdx, numerator);
+ if (abs_imm == 2) {
+ __ movq(rdx, numerator);
+ __ shrq(rdx, Immediate(63));
+ __ addq(rdx, numerator);
+ } else {
+ codegen_->Load64BitValue(rdx, abs_imm - 1);
+ __ addq(rdx, numerator);
+ __ testq(numerator, numerator);
+ __ cmov(kGreaterEqual, rdx, numerator);
+ }
int shift = CTZ(imm);
__ sarq(rdx, Immediate(shift));
@@ -3547,7 +3678,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
if (instruction->GetResultType() == DataType::Type::kInt32) {
int imm = second.GetConstant()->AsIntConstant()->GetValue();
- CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
__ movl(numerator, eax);
@@ -3584,7 +3715,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat
CpuRegister rax = eax;
CpuRegister rdx = edx;
- CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
+ CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
// Save the numerator.
__ movq(numerator, rax);
@@ -3651,8 +3782,12 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in
// Do not generate anything. DivZeroCheck would prevent any code to be executed.
} else if (imm == 1 || imm == -1) {
DivRemOneOrMinusOne(instruction);
- } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
- DivByPowerOfTwo(instruction->AsDiv());
+ } else if (IsPowerOfTwo(AbsOrMin(imm))) {
+ if (is_div) {
+ DivByPowerOfTwo(instruction->AsDiv());
+ } else {
+ RemByPowerOfTwo(instruction->AsRem());
+ }
} else {
DCHECK(imm <= -2 || imm >= 2);
GenerateDivRemWithAnyConstant(instruction);
@@ -3821,6 +3956,241 @@ void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
}
}
+static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
+ LocationSummary* locations = new (allocator) LocationSummary(minmax);
+ switch (minmax->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetInAt(1, Location::RequiresFpuRegister());
+ // The following is sub-optimal, but all we can do for now. It would be fine to also accept
+ // the second input to be the output (we can simply swap inputs).
+ locations->SetOut(Location::SameAsFirstInput());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ // Can return immediately, as op1_loc == out_loc.
+ // Note: if we ever support separate registers, e.g., output into memory, we need to check for
+ // a copy here.
+ DCHECK(locations->Out().Equals(op1_loc));
+ return;
+ }
+
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
+
+ // (out := op1)
+ // out <=? op2
+ // if out is min jmp done
+ // out := op2
+ // done:
+
+ if (type == DataType::Type::kInt64) {
+ __ cmpq(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt32);
+ __ cmpl(out, op2);
+ __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
+ }
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
+ bool is_min,
+ DataType::Type type) {
+ Location op1_loc = locations->InAt(0);
+ Location op2_loc = locations->InAt(1);
+ Location out_loc = locations->Out();
+ XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
+
+ // Shortcut for same input locations.
+ if (op1_loc.Equals(op2_loc)) {
+ DCHECK(out_loc.Equals(op1_loc));
+ return;
+ }
+
+ // (out := op1)
+ // out <=? op2
+ // if Nan jmp Nan_label
+ // if out is min jmp done
+ // if op2 is min jmp op2_label
+ // handle -0/+0
+ // jmp done
+ // Nan_label:
+ // out := NaN
+ // op2_label:
+ // out := op2
+ // done:
+ //
+ // This removes one jmp, but needs to copy one input (op1) to out.
+ //
+ // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
+
+ XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
+
+ NearLabel nan, done, op2_label;
+ if (type == DataType::Type::kFloat64) {
+ __ ucomisd(out, op2);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kFloat32);
+ __ ucomiss(out, op2);
+ }
+
+ __ j(Condition::kParityEven, &nan);
+
+ __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
+ __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
+
+ // Handle 0.0/-0.0.
+ if (is_min) {
+ if (type == DataType::Type::kFloat64) {
+ __ orpd(out, op2);
+ } else {
+ __ orps(out, op2);
+ }
+ } else {
+ if (type == DataType::Type::kFloat64) {
+ __ andpd(out, op2);
+ } else {
+ __ andps(out, op2);
+ }
+ }
+ __ jmp(&done);
+
+ // NaN handling.
+ __ Bind(&nan);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
+ } else {
+ __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
+ }
+ __ jmp(&done);
+
+ // out := op2;
+ __ Bind(&op2_label);
+ if (type == DataType::Type::kFloat64) {
+ __ movsd(out, op2);
+ } else {
+ __ movss(out, op2);
+ }
+
+ // Done.
+ __ Bind(&done);
+}
+
+void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
+ DataType::Type type = minmax->GetResultType();
+ switch (type) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HMinMax " << type;
+ }
+}
+
+void LocationsBuilderX86_64::VisitMin(HMin* min) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
+ GenerateMinMax(min, /*is_min*/ true);
+}
+
+void LocationsBuilderX86_64::VisitMax(HMax* max) {
+ CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
+}
+
+void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
+ GenerateMinMax(max, /*is_min*/ false);
+}
+
+void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32:
+ case DataType::Type::kInt64:
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresRegister());
+ break;
+ case DataType::Type::kFloat32:
+ case DataType::Type::kFloat64:
+ locations->SetInAt(0, Location::RequiresFpuRegister());
+ locations->SetOut(Location::SameAsFirstInput());
+ locations->AddTemp(Location::RequiresFpuRegister());
+ break;
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
+void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
+ LocationSummary* locations = abs->GetLocations();
+ switch (abs->GetResultType()) {
+ case DataType::Type::kInt32: {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // Create mask.
+ __ movl(mask, out);
+ __ sarl(mask, Immediate(31));
+ // Add mask.
+ __ addl(out, mask);
+ __ xorl(out, mask);
+ break;
+ }
+ case DataType::Type::kInt64: {
+ CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+ CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
+ // Create mask.
+ __ movq(mask, out);
+ __ sarq(mask, Immediate(63));
+ // Add mask.
+ __ addq(out, mask);
+ __ xorq(out, mask);
+ break;
+ }
+ case DataType::Type::kFloat32: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
+ __ andps(out, mask);
+ break;
+ }
+ case DataType::Type::kFloat64: {
+ XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
+ XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+ __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
+ __ andpd(out, mask);
+ break;
+ }
+ default:
+ LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
+ }
+}
+
void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
locations->SetInAt(0, Location::Any());
@@ -4030,29 +4400,14 @@ void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
instruction, LocationSummary::kCallOnMainOnly);
InvokeRuntimeCallingConvention calling_convention;
- if (instruction->IsStringAlloc()) {
- locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
- } else {
- locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
- }
+ locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
locations->SetOut(Location::RegisterLocation(RAX));
}
void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- if (instruction->IsStringAlloc()) {
- // String is allocated through StringFactory. Call NewEmptyString entry point.
- CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
- MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
- __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
- __ call(Address(temp, code_offset.SizeValue()));
- codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
- } else {
- codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
- DCHECK(!codegen_->IsLeafMethod());
- }
+ codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+ CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ DCHECK(!codegen_->IsLeafMethod());
}
void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
@@ -4065,10 +4420,8 @@ void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
}
void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
- // Note: if heap poisoning is enabled, the entry point takes cares
- // of poisoning the reference.
- QuickEntrypointEnum entrypoint =
- CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
+ // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
+ QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
DCHECK(!codegen_->IsLeafMethod());
@@ -4200,7 +4553,7 @@ void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
}
case MemBarrierKind::kNTStoreStore:
// Non-Temporal Store/Store needs an explicit fence.
- MemoryFence(/* non-temporal */ true);
+ MemoryFence(/* non-temporal= */ true);
break;
}
}
@@ -4277,7 +4630,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, base, offset, /* needs_null_check */ true);
+ instruction, out, base, offset, /* needs_null_check= */ true);
if (is_volatile) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
}
@@ -4732,7 +5085,7 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
// Note that a potential implicit null check is handled in this
// CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
codegen_->GenerateArrayLoadWithBakerReadBarrier(
- instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
+ instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
} else {
CpuRegister out = out_loc.AsRegister<CpuRegister>();
__ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
@@ -5130,10 +5483,26 @@ void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
__ testl(value, value);
__ j(kEqual, &is_null);
}
+ // Load the address of the card table into `card`.
__ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
- /* no_rip */ true));
+ /* no_rip= */ true));
+ // Calculate the offset (in the card table) of the card corresponding to
+ // `object`.
__ movq(temp, object);
__ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
+ // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
+ // `object`'s card.
+ //
+ // Register `card` contains the address of the card table. Note that the card
+ // table's base is biased during its creation so that it always starts at an
+ // address whose least-significant byte is equal to `kCardDirty` (see
+ // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
+ // below writes the `kCardDirty` (byte) value into the `object`'s card
+ // (located at `card + object >> kCardShift`).
+ //
+ // This dual use of the value in register `card` (1. to calculate the location
+ // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
+ // (no need to explicitly load `kCardDirty` as an immediate value).
__ movb(Address(temp, card, TIMES_1, 0), card);
if (value_can_be_null) {
__ Bind(&is_null);
@@ -5196,7 +5565,7 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc
}
__ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
- /* no_rip */ true),
+ /* no_rip= */ true),
Immediate(0));
if (successor == nullptr) {
__ j(kNotEqual, slow_path->GetEntryLabel());
@@ -5462,6 +5831,26 @@ void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
// No need for memory fence, thanks to the x86-64 memory model.
}
+void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
+ CpuRegister temp) {
+ uint32_t path_to_root = check->GetBitstringPathToRoot();
+ uint32_t mask = check->GetBitstringMask();
+ DCHECK(IsPowerOfTwo(mask + 1));
+ size_t mask_bits = WhichPowerOf2(mask + 1);
+
+ if (mask_bits == 16u) {
+ // Compare the bitstring in memory.
+ __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
+ } else {
+ // /* uint32_t */ temp = temp->status_
+ __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
+ // Compare the bitstring bits using SUB.
+ __ subl(temp, Immediate(path_to_root));
+ // Shift out bits that do not contribute to the comparison.
+ __ shll(temp, Immediate(32u - mask_bits));
+ }
+}
+
HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
HLoadClass::LoadKind desired_class_load_kind) {
switch (desired_class_load_kind) {
@@ -5471,14 +5860,14 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
case HLoadClass::LoadKind::kReferrersClass:
break;
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadClass::LoadKind::kJitBootImageAddress:
case HLoadClass::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadClass::LoadKind::kBootImageAddress:
case HLoadClass::LoadKind::kRuntimeCall:
break;
}
@@ -5513,10 +5902,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
if (load_kind == HLoadClass::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the type resolution and/or initialization to save everything.
- // Custom calling convention: RAX serves as both input and output.
- RegisterSet caller_saves = RegisterSet::Empty();
- caller_saves.Add(Location::RegisterLocation(RAX));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -5561,48 +5947,41 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
cls,
out_loc,
Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
- /* fixup_label */ nullptr,
+ /* fixup_label= */ nullptr,
read_barrier_option);
break;
}
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+ __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
codegen_->RecordBootImageTypePatch(cls);
break;
- case HLoadClass::LoadKind::kBootImageAddress: {
- DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
- DCHECK_NE(address, 0u);
- __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
- break;
- }
- case HLoadClass::LoadKind::kBootImageClassTable: {
+ case HLoadClass::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageTypePatch(cls);
- // Extract the reference from the slot data, i.e. clear the hash bits.
- int32_t masked_hash = ClassTable::TableSlot::MaskHash(
- ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex())));
- if (masked_hash != 0) {
- __ subl(out, Immediate(masked_hash));
- }
+ __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
break;
}
case HLoadClass::LoadKind::kBssEntry: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ false);
+ /* no_rip= */ false);
Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
generate_null_check = true;
break;
}
+ case HLoadClass::LoadKind::kJitBootImageAddress: {
+ DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
+ uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
+ DCHECK_NE(address, 0u);
+ __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
+ break;
+ }
case HLoadClass::LoadKind::kJitTableAddress: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ true);
+ /* no_rip= */ true);
Label* fixup_label =
codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
// /* GcRoot<mirror::Class> */ out = *address
@@ -5616,8 +5995,8 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S
if (generate_null_check || cls->MustGenerateClinitCheck()) {
DCHECK(cls->CanCallRuntime());
- SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
- cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+ SlowPathCode* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
codegen_->AddSlowPath(slow_path);
if (generate_null_check) {
__ testl(out, out);
@@ -5638,12 +6017,34 @@ void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
if (check->HasUses()) {
locations->SetOut(Location::SameAsFirstInput());
}
+ // Rely on the type initialization to save everything we need.
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
+}
+
+void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ // Custom calling convention: RAX serves as both input and output.
+ Location location = Location::RegisterLocation(RAX);
+ CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
+ codegen_->GenerateLoadMethodHandleRuntimeCall(load);
+}
+
+void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
+ // Custom calling convention: RAX serves as both input and output.
+ Location location = Location::RegisterLocation(RAX);
+ CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
+}
+
+void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
+ codegen_->GenerateLoadMethodTypeRuntimeCall(load);
}
void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
// We assume the class to not be null.
- SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(
- check->GetLoadClass(), check, check->GetDexPc(), true);
+ SlowPathCode* slow_path =
+ new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
codegen_->AddSlowPath(slow_path);
GenerateClassInitializationCheck(slow_path,
check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
@@ -5653,14 +6054,14 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
HLoadString::LoadKind desired_string_load_kind) {
switch (desired_string_load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
DCHECK(!Runtime::Current()->UseJitCompilation());
break;
+ case HLoadString::LoadKind::kJitBootImageAddress:
case HLoadString::LoadKind::kJitTableAddress:
DCHECK(Runtime::Current()->UseJitCompilation());
break;
- case HLoadString::LoadKind::kBootImageAddress:
case HLoadString::LoadKind::kRuntimeCall:
break;
}
@@ -5677,10 +6078,7 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
if (!kUseReadBarrier || kUseBakerReadBarrier) {
// Rely on the pResolveString to save everything.
- // Custom calling convention: RAX serves as both input and output.
- RegisterSet caller_saves = RegisterSet::Empty();
- caller_saves.Add(Location::RegisterLocation(RAX));
- locations->SetCustomSlowPathCallerSaves(caller_saves);
+ locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
} else {
// For non-Baker read barrier we have a temp-clobbering call.
}
@@ -5708,26 +6106,19 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
switch (load->GetLoadKind()) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
DCHECK(codegen_->GetCompilerOptions().IsBootImage());
- __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
+ __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
codegen_->RecordBootImageStringPatch(load);
return;
}
- case HLoadString::LoadKind::kBootImageAddress: {
- uint32_t address = dchecked_integral_cast<uint32_t>(
- reinterpret_cast<uintptr_t>(load->GetString().Get()));
- DCHECK_NE(address, 0u);
- __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
- return;
- }
- case HLoadString::LoadKind::kBootImageInternTable: {
+ case HLoadString::LoadKind::kBootImageRelRo: {
DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
- __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
- codegen_->RecordBootImageStringPatch(load);
+ __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
+ codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
return;
}
case HLoadString::LoadKind::kBssEntry: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ false);
+ /* no_rip= */ false);
Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
// /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
@@ -5738,9 +6129,15 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
__ Bind(slow_path->GetExitLabel());
return;
}
+ case HLoadString::LoadKind::kJitBootImageAddress: {
+ uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
+ DCHECK_NE(address, 0u);
+ __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
+ return;
+ }
case HLoadString::LoadKind::kJitTableAddress: {
Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
- /* no_rip */ true);
+ /* no_rip= */ true);
Label* fixup_label = codegen_->NewJitRootStringPatch(
load->GetDexFile(), load->GetStringIndex(), load->GetString());
// /* GcRoot<mirror::String> */ out = *address
@@ -5762,7 +6159,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA
static Address GetExceptionTlsAddress() {
return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
- /* no_rip */ true);
+ /* no_rip= */ true);
}
void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
@@ -5795,24 +6192,26 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
}
-static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
- if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
- // We need a temporary for holding the iftable length.
- return true;
- }
- return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+ if (kEmitCompilerReadBarrier &&
!kUseBakerReadBarrier &&
(type_check_kind == TypeCheckKind::kAbstractClassCheck ||
type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck);
+ type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+ return 1;
+ }
+ return 0;
}
-static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
- return kEmitCompilerReadBarrier &&
- !kUseBakerReadBarrier &&
- (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
- type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
- type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Interface case has 2 temps, one for holding the number of interfaces, one for the current
+// interface pointer, the current interface is compared in memory.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+ if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+ return 2;
+ }
+ return 1 + NumberOfInstanceOfTemps(type_check_kind);
}
void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5834,6 +6233,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
case TypeCheckKind::kInterfaceCheck:
call_kind = LocationSummary::kCallOnSlowPath;
break;
+ case TypeCheckKind::kBitstringCheck:
+ break;
}
LocationSummary* locations =
@@ -5842,14 +6243,16 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
}
locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::Any());
+ if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
+ } else {
+ locations->SetInAt(1, Location::Any());
+ }
// Note that TypeCheckSlowPathX86_64 uses this "out" register too.
locations->SetOut(Location::RequiresRegister());
- // When read barriers are enabled, we need a temporary register for
- // some cases.
- if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
- }
+ locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5860,9 +6263,9 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
Location cls = locations->InAt(1);
Location out_loc = locations->Out();
CpuRegister out = out_loc.AsRegister<CpuRegister>();
- Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(0) :
- Location::NoLocation();
+ const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+ DCHECK_LE(num_temps, 1u);
+ Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6031,7 +6434,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ j(kNotEqual, slow_path->GetEntryLabel());
__ movl(out, Immediate(1));
@@ -6063,7 +6466,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
// This should also be beneficial for the other cases above.
DCHECK(locations->OnlyCallsOnSlowPath());
slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
- instruction, /* is_fatal */ false);
+ instruction, /* is_fatal= */ false);
codegen_->AddSlowPath(slow_path);
__ jmp(slow_path->GetEntryLabel());
if (zero.IsLinked()) {
@@ -6071,6 +6474,27 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
}
break;
}
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ out_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, out);
+ if (zero.IsLinked()) {
+ __ j(kNotEqual, &zero);
+ __ movl(out, Immediate(1));
+ __ jmp(&done);
+ } else {
+ __ setcc(kEqual, out);
+ // setcc only sets the low byte.
+ __ andl(out, Immediate(1));
+ }
+ break;
+ }
}
if (zero.IsLinked()) {
@@ -6097,17 +6521,15 @@ void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
// Require a register for the interface check since there is a loop that compares the class to
// a memory address.
locations->SetInAt(1, Location::RequiresRegister());
+ } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
+ locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+ locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
+ locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
} else {
locations->SetInAt(1, Location::Any());
}
-
- // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
- locations->AddTemp(Location::RequiresRegister());
- // When read barriers are enabled, we need an additional temporary
- // register for some cases.
- if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
- locations->AddTemp(Location::RequiresRegister());
- }
+ // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
+ locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
}
void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
@@ -6118,9 +6540,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
Location cls = locations->InAt(1);
Location temp_loc = locations->GetTemp(0);
CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
- Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
- locations->GetTemp(1) :
- Location::NoLocation();
+ const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+ DCHECK_GE(num_temps, 1u);
+ DCHECK_LE(num_temps, 2u);
+ Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6283,7 +6706,7 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
break;
}
- case TypeCheckKind::kInterfaceCheck:
+ case TypeCheckKind::kInterfaceCheck: {
// Fast path for the interface check. Try to avoid read barriers to improve the fast path.
// We can not get false positives by doing this.
// /* HeapReference<Class> */ temp = obj->klass_
@@ -6319,6 +6742,20 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
// If `cls` was poisoned above, unpoison it.
__ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
break;
+ }
+
+ case TypeCheckKind::kBitstringCheck: {
+ // /* HeapReference<Class> */ temp = obj->klass_
+ GenerateReferenceLoadTwoRegisters(instruction,
+ temp_loc,
+ obj_loc,
+ class_offset,
+ kWithoutReadBarrier);
+
+ GenerateBitstringTypeCheckCompare(instruction, temp);
+ __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
+ break;
+ }
}
if (done.IsLinked()) {
@@ -6346,6 +6783,48 @@ void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* in
}
}
+void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ // There is no immediate variant of negated bitwise and in X86.
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
+ DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
+ LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location first = locations->InAt(0);
+ Location second = locations->InAt(1);
+ Location dest = locations->Out();
+ __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
+}
+
+void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
+ LocationSummary* locations = instruction->GetLocations();
+ Location src = locations->InAt(0);
+ Location dest = locations->Out();
+ switch (instruction->GetOpKind()) {
+ case HInstruction::kAnd:
+ __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
+ break;
+ case HInstruction::kXor:
+ __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
+ break;
+ default:
+ LOG(FATAL) << "Unreachable";
+ }
+}
+
void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
@@ -6474,7 +6953,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(out + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, out_reg, offset, /* needs_null_check */ false);
+ instruction, out, out_reg, offset, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// Save the value of `out` into `maybe_temp` before overwriting it
@@ -6508,7 +6987,7 @@ void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
// Load with fast path based Baker's read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- instruction, out, obj_reg, offset, /* needs_null_check */ false);
+ instruction, out, obj_reg, offset, /* needs_null_check= */ false);
} else {
// Load with slow path based read barrier.
// /* HeapReference<Object> */ out = *(obj + offset)
@@ -6557,13 +7036,13 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
// Slow path marking the GC root `root`.
SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
- instruction, root, /* unpoison_ref_before_marking */ false);
+ instruction, root, /* unpoison_ref_before_marking= */ false);
codegen_->AddSlowPath(slow_path);
// Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
const int32_t entry_point_offset =
Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
- __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
+ __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
// The entrypoint is null when the GC is not marking.
__ j(kNotEqual, slow_path->GetEntryLabel());
__ Bind(slow_path->GetExitLabel());
@@ -6660,7 +7139,7 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
// Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
@@ -6689,10 +7168,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction
DCHECK(temp1 != nullptr);
DCHECK(temp2 != nullptr);
slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
- instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
+ instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
} else {
slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
- instruction, ref, /* unpoison_ref_before_marking */ true);
+ instruction, ref, /* unpoison_ref_before_marking= */ true);
}
AddSlowPath(slow_path);
@@ -7005,7 +7484,7 @@ class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenera
CodeGeneratorX86_64* codegen_;
private:
- void Process(const MemoryRegion& region, int pos) OVERRIDE {
+ void Process(const MemoryRegion& region, int pos) override {
// Patch the correct offset for the instruction. We use the address of the
// 'next' instruction, which is 'pos' (patch the 4 bytes before).
int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
@@ -7152,7 +7631,7 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
uintptr_t address =
reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
- typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
+ using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
dchecked_integral_cast<uint32_t>(address);
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 1079e94dfc..f74e130702 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -83,22 +83,22 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention {
public:
FieldAccessCallingConventionX86_64() {}
- Location GetObjectLocation() const OVERRIDE {
+ Location GetObjectLocation() const override {
return Location::RegisterLocation(RSI);
}
- Location GetFieldIndexLocation() const OVERRIDE {
+ Location GetFieldIndexLocation() const override {
return Location::RegisterLocation(RDI);
}
- Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::RegisterLocation(RAX);
}
Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance)
- const OVERRIDE {
+ const override {
return is_instance
? Location::RegisterLocation(RDX)
: Location::RegisterLocation(RSI);
}
- Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const override {
return Location::FpuRegisterLocation(XMM0);
}
@@ -112,9 +112,9 @@ class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventio
InvokeDexCallingConventionVisitorX86_64() {}
virtual ~InvokeDexCallingConventionVisitorX86_64() {}
- Location GetNextLocation(DataType::Type type) OVERRIDE;
- Location GetReturnLocation(DataType::Type type) const OVERRIDE;
- Location GetMethodLocation() const OVERRIDE;
+ Location GetNextLocation(DataType::Type type) override;
+ Location GetReturnLocation(DataType::Type type) const override;
+ Location GetMethodLocation() const override;
private:
InvokeDexCallingConvention calling_convention;
@@ -129,10 +129,10 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap {
ParallelMoveResolverX86_64(ArenaAllocator* allocator, CodeGeneratorX86_64* codegen)
: ParallelMoveResolverWithSwap(allocator), codegen_(codegen) {}
- void EmitMove(size_t index) OVERRIDE;
- void EmitSwap(size_t index) OVERRIDE;
- void SpillScratch(int reg) OVERRIDE;
- void RestoreScratch(int reg) OVERRIDE;
+ void EmitMove(size_t index) override;
+ void EmitSwap(size_t index) override;
+ void SpillScratch(int reg) override;
+ void RestoreScratch(int reg) override;
X86_64Assembler* GetAssembler() const;
@@ -157,14 +157,15 @@ class LocationsBuilderX86_64 : public HGraphVisitor {
: HGraphVisitor(graph), codegen_(codegen) {}
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -188,14 +189,15 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen);
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE;
+ void Visit##name(H##name* instr) override;
FOR_EACH_CONCRETE_INSTRUCTION_COMMON(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_X86_64(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(DECLARE_VISIT_INSTRUCTION)
#undef DECLARE_VISIT_INSTRUCTION
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Unreachable instruction " << instruction->DebugName()
<< " (id " << instruction->GetId() << ")";
}
@@ -208,10 +210,12 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
// the suspend call.
void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
void GenerateClassInitializationCheck(SlowPathCode* slow_path, CpuRegister class_reg);
+ void GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check, CpuRegister temp);
void HandleBitwiseOperation(HBinaryOperation* operation);
void GenerateRemFP(HRem* rem);
void DivRemOneOrMinusOne(HBinaryOperation* instruction);
void DivByPowerOfTwo(HDiv* instruction);
+ void RemByPowerOfTwo(HRem* instruction);
void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
void GenerateDivRemIntegral(HBinaryOperation* instruction);
void HandleCondition(HCondition* condition);
@@ -222,6 +226,10 @@ class InstructionCodeGeneratorX86_64 : public InstructionCodeGenerator {
bool value_can_be_null);
void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+ void GenerateMinMaxInt(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMaxFP(LocationSummary* locations, bool is_min, DataType::Type type);
+ void GenerateMinMax(HBinaryOperation* minmax, bool is_min);
+
// Generate a heap reference load using one register `out`:
//
// out <- *(out + offset)
@@ -291,28 +299,27 @@ class JumpTableRIPFixup;
class CodeGeneratorX86_64 : public CodeGenerator {
public:
CodeGeneratorX86_64(HGraph* graph,
- const X86_64InstructionSetFeatures& isa_features,
const CompilerOptions& compiler_options,
OptimizingCompilerStats* stats = nullptr);
virtual ~CodeGeneratorX86_64() {}
- void GenerateFrameEntry() OVERRIDE;
- void GenerateFrameExit() OVERRIDE;
- void Bind(HBasicBlock* block) OVERRIDE;
- void MoveConstant(Location destination, int32_t value) OVERRIDE;
- void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE;
- void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
+ void GenerateFrameEntry() override;
+ void GenerateFrameExit() override;
+ void Bind(HBasicBlock* block) override;
+ void MoveConstant(Location destination, int32_t value) override;
+ void MoveLocation(Location dst, Location src, DataType::Type dst_type) override;
+ void AddLocationAsTemp(Location location, LocationSummary* locations) override;
- size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
- size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
+ size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreCoreRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
+ size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) override;
// Generate code to invoke a runtime entry point.
void InvokeRuntime(QuickEntrypointEnum entrypoint,
HInstruction* instruction,
uint32_t dex_pc,
- SlowPathCode* slow_path = nullptr) OVERRIDE;
+ SlowPathCode* slow_path = nullptr) override;
// Generate code to invoke a runtime entry point, but do not record
// PC-related information in a stack map.
@@ -322,49 +329,51 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void GenerateInvokeRuntime(int32_t entry_point_offset);
- size_t GetWordSize() const OVERRIDE {
+ size_t GetWordSize() const override {
return kX86_64WordSize;
}
- size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
+ size_t GetFloatingPointSpillSlotSize() const override {
return GetGraph()->HasSIMD()
? 2 * kX86_64WordSize // 16 bytes == 2 x86_64 words for each spill
: 1 * kX86_64WordSize; // 8 bytes == 1 x86_64 words for each spill
}
- HGraphVisitor* GetLocationBuilder() OVERRIDE {
+ HGraphVisitor* GetLocationBuilder() override {
return &location_builder_;
}
- HGraphVisitor* GetInstructionVisitor() OVERRIDE {
+ HGraphVisitor* GetInstructionVisitor() override {
return &instruction_visitor_;
}
- X86_64Assembler* GetAssembler() OVERRIDE {
+ X86_64Assembler* GetAssembler() override {
return &assembler_;
}
- const X86_64Assembler& GetAssembler() const OVERRIDE {
+ const X86_64Assembler& GetAssembler() const override {
return assembler_;
}
- ParallelMoveResolverX86_64* GetMoveResolver() OVERRIDE {
+ ParallelMoveResolverX86_64* GetMoveResolver() override {
return &move_resolver_;
}
- uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+ uintptr_t GetAddressOf(HBasicBlock* block) override {
return GetLabelOf(block)->Position();
}
- void SetupBlockedRegisters() const OVERRIDE;
- void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
- void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
- void Finalize(CodeAllocator* allocator) OVERRIDE;
+ void SetupBlockedRegisters() const override;
+ void DumpCoreRegister(std::ostream& stream, int reg) const override;
+ void DumpFloatingPointRegister(std::ostream& stream, int reg) const override;
+ void Finalize(CodeAllocator* allocator) override;
- InstructionSet GetInstructionSet() const OVERRIDE {
+ InstructionSet GetInstructionSet() const override {
return InstructionSet::kX86_64;
}
+ const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const;
+
// Emit a write barrier.
void MarkGCCard(CpuRegister temp,
CpuRegister card,
@@ -381,35 +390,37 @@ class CodeGeneratorX86_64 : public CodeGenerator {
return CommonGetLabelOf<Label>(block_labels_, block);
}
- void Initialize() OVERRIDE {
+ void Initialize() override {
block_labels_ = CommonInitializeLabels<Label>();
}
- bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const override {
return false;
}
// Check if the desired_string_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadString::LoadKind GetSupportedLoadStringKind(
- HLoadString::LoadKind desired_string_load_kind) OVERRIDE;
+ HLoadString::LoadKind desired_string_load_kind) override;
// Check if the desired_class_load_kind is supported. If it is, return it,
// otherwise return a fall-back kind that should be used instead.
HLoadClass::LoadKind GetSupportedLoadClassKind(
- HLoadClass::LoadKind desired_class_load_kind) OVERRIDE;
+ HLoadClass::LoadKind desired_class_load_kind) override;
// Check if the desired_dispatch_info is supported. If it is, return it,
// otherwise return a fall-back info that should be used instead.
HInvokeStaticOrDirect::DispatchInfo GetSupportedInvokeStaticOrDirectDispatch(
const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
- HInvokeStaticOrDirect* invoke) OVERRIDE;
+ ArtMethod* method) override;
void GenerateStaticOrDirectCall(
- HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
void GenerateVirtualCall(
- HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+ HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) override;
+ void RecordBootImageIntrinsicPatch(uint32_t intrinsic_data);
+ void RecordBootImageRelRoPatch(uint32_t boot_image_offset);
void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke);
void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke);
void RecordBootImageTypePatch(HLoadClass* load_class);
@@ -423,20 +434,17 @@ class CodeGeneratorX86_64 : public CodeGenerator {
dex::TypeIndex type_index,
Handle<mirror::Class> handle);
- void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE;
+ void LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference);
+ void AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke, uint32_t boot_image_offset);
- void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE;
+ void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) override;
void PatchJitRootUse(uint8_t* code,
const uint8_t* roots_data,
const PatchInfo<Label>& info,
uint64_t index_in_table) const;
- void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
-
- const X86_64InstructionSetFeatures& GetInstructionSetFeatures() const {
- return isa_features_;
- }
+ void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) override;
// Fast path implementation of ReadBarrier::Barrier for a heap
// reference field load when Baker's read barriers are used.
@@ -560,6 +568,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
// Store a 64 bit value into a DoubleStackSlot in the most efficient manner.
void Store64BitValueToStack(Location dest, int64_t value);
+ void MoveFromReturnRegister(Location trg, DataType::Type type) override;
+
// Assign a 64 bit constant to an address.
void MoveInt64ToAddress(const Address& addr_low,
const Address& addr_high,
@@ -578,9 +588,9 @@ class CodeGeneratorX86_64 : public CodeGenerator {
}
}
- void GenerateNop() OVERRIDE;
- void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
- void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
+ void GenerateNop() override;
+ void GenerateImplicitNullCheck(HNullCheck* instruction) override;
+ void GenerateExplicitNullCheck(HNullCheck* instruction) override;
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
@@ -598,24 +608,26 @@ class CodeGeneratorX86_64 : public CodeGenerator {
InstructionCodeGeneratorX86_64 instruction_visitor_;
ParallelMoveResolverX86_64 move_resolver_;
X86_64Assembler assembler_;
- const X86_64InstructionSetFeatures& isa_features_;
// Offset to the start of the constant area in the assembled code.
// Used for fixups to the constant area.
int constant_area_start_;
- // PC-relative method patch info for kBootImageLinkTimePcRelative.
+ // PC-relative method patch info for kBootImageLinkTimePcRelative/kBootImageRelRo.
+ // Also used for type/string patches for kBootImageRelRo (same linker patch as for methods).
ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
// PC-relative method patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
// PC-relative type patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
- // Type patch locations for kBssEntry.
+ // PC-relative type patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
- // String patch locations; type depends on configuration (intern table or boot image PIC).
+ // PC-relative String patch info for kBootImageLinkTimePcRelative.
ArenaDeque<PatchInfo<Label>> boot_image_string_patches_;
- // String patch locations for kBssEntry.
+ // PC-relative String patch info for kBssEntry.
ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_;
+ // PC-relative patch info for IntrinsicObjects.
+ ArenaDeque<PatchInfo<Label>> boot_image_intrinsic_patches_;
// Patches for string literals in JIT compiled code.
ArenaDeque<PatchInfo<Label>> jit_string_patches_;
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
index 2e31d35584..f406983fc2 100644
--- a/compiler/optimizing/code_sinking.cc
+++ b/compiler/optimizing/code_sinking.cc
@@ -25,11 +25,11 @@
namespace art {
-void CodeSinking::Run() {
+bool CodeSinking::Run() {
HBasicBlock* exit = graph_->GetExitBlock();
if (exit == nullptr) {
// Infinite loop, just bail.
- return;
+ return false;
}
// TODO(ngeoffray): we do not profile branches yet, so use throw instructions
// as an indicator of an uncommon branch.
@@ -40,6 +40,7 @@ void CodeSinking::Run() {
SinkCodeToUncommonBranch(exit_predecessor);
}
}
+ return true;
}
static bool IsInterestingInstruction(HInstruction* instruction) {
@@ -179,7 +180,7 @@ static HInstruction* FindIdealPosition(HInstruction* instruction,
DCHECK(!instruction->IsPhi()); // Makes no sense for Phi.
// Find the target block.
- CommonDominator finder(/* start_block */ nullptr);
+ CommonDominator finder(/* block= */ nullptr);
for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
HInstruction* user = use.GetUser();
if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
@@ -258,12 +259,12 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
size_t number_of_instructions = graph_->GetCurrentInstructionId();
ScopedArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
- ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+ ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable= */ false);
processed_instructions.ClearAllBits();
- ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+ ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable= */ false);
post_dominated.ClearAllBits();
ArenaBitVector instructions_that_can_move(
- &allocator, number_of_instructions, /* expandable */ false);
+ &allocator, number_of_instructions, /* expandable= */ false);
instructions_that_can_move.ClearAllBits();
ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
@@ -413,7 +414,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
}
// Find the position of the instruction we're storing into, filtering out this
// store and all other stores to that instruction.
- position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+ position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter= */ true);
// The position needs to be dominated by the store, in order for the store to move there.
if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
@@ -433,7 +434,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
continue;
}
MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk);
- instruction->MoveBefore(position, /* ensure_safety */ false);
+ instruction->MoveBefore(position, /* do_checks= */ false);
}
}
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
index 836d9d4f67..8eb3a520c3 100644
--- a/compiler/optimizing/code_sinking.h
+++ b/compiler/optimizing/code_sinking.h
@@ -33,7 +33,7 @@ class CodeSinking : public HOptimization {
const char* name = kCodeSinkingPassName)
: HOptimization(graph, name, stats) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kCodeSinkingPassName = "code_sinking";
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index a0fd5ffcb1..b5a7c137f6 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -89,7 +89,8 @@ void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, i
HGraph* graph = CreateCFG(data);
// Remove suspend checks, they cannot be executed in this context.
RemoveSuspendChecks(graph);
- RunCode(target_config, graph, [](HGraph*) {}, has_result, expected);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected);
}
}
@@ -100,7 +101,8 @@ void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data,
HGraph* graph = CreateCFG(data, DataType::Type::kInt64);
// Remove suspend checks, they cannot be executed in this context.
RemoveSuspendChecks(graph);
- RunCode(target_config, graph, [](HGraph*) {}, has_result, expected);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, has_result, expected);
}
}
@@ -451,7 +453,7 @@ TEST_F(CodegenTest, NonMaterializedCondition) {
ASSERT_FALSE(equal->IsEmittedAtUseSite());
graph->BuildDominatorTree();
- PrepareForRegisterAllocation(graph).Run();
+ PrepareForRegisterAllocation(graph, *compiler_options_).Run();
ASSERT_TRUE(equal->IsEmittedAtUseSite());
auto hook_before_codegen = [](HGraph* graph_in) {
@@ -460,7 +462,8 @@ TEST_F(CodegenTest, NonMaterializedCondition) {
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_config, graph, hook_before_codegen, true, 0);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, 0);
}
}
@@ -506,7 +509,8 @@ TEST_F(CodegenTest, MaterializedCondition1) {
new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator());
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
}
}
}
@@ -573,7 +577,8 @@ TEST_F(CodegenTest, MaterializedCondition2) {
new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator());
block->InsertInstructionBefore(move, block->GetLastInstruction());
};
- RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, hook_before_codegen, true, lhs[i] < rhs[i]);
}
}
}
@@ -682,7 +687,8 @@ void CodegenTest::TestComparison(IfCondition condition,
block->AddInstruction(new (GetAllocator()) HReturn(comparison));
graph->BuildDominatorTree();
- RunCode(target_config, graph, [](HGraph*) {}, true, expected_result);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
+ RunCode(target_config, *compiler_options_, graph, [](HGraph*) {}, true, expected_result);
}
TEST_F(CodegenTest, ComparisonsInt) {
@@ -713,10 +719,9 @@ TEST_F(CodegenTest, ComparisonsLong) {
#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
- std::unique_ptr<const ArmInstructionSetFeatures> features(
- ArmInstructionSetFeatures::FromCppDefines());
+ OverrideInstructionSetFeatures(InstructionSet::kThumb2, "default");
HGraph* graph = CreateGraph();
- arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions());
+ arm::CodeGeneratorARMVIXL codegen(graph, *compiler_options_);
codegen.Initialize();
@@ -737,10 +742,9 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) {
#ifdef ART_ENABLE_CODEGEN_arm64
// Regression test for b/34760542.
TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features(
- Arm64InstructionSetFeatures::FromCppDefines());
+ OverrideInstructionSetFeatures(InstructionSet::kArm64, "default");
HGraph* graph = CreateGraph();
- arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions());
+ arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_);
codegen.Initialize();
@@ -787,10 +791,9 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) {
// Check that ParallelMoveResolver works fine for ARM64 for both cases when SIMD is on and off.
TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features(
- Arm64InstructionSetFeatures::FromCppDefines());
+ OverrideInstructionSetFeatures(InstructionSet::kArm64, "default");
HGraph* graph = CreateGraph();
- arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions());
+ arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_);
codegen.Initialize();
@@ -820,13 +823,40 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) {
InternalCodeAllocator code_allocator;
codegen.Finalize(&code_allocator);
}
+
+// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a75 as example).
+TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA75) {
+ OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a75");
+ HGraph* graph = CreateGraph();
+ arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_);
+ vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures();
+
+ EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32));
+ EXPECT_TRUE(features->Has(vixl::CPUFeatures::kDotProduct));
+ EXPECT_TRUE(features->Has(vixl::CPUFeatures::kFPHalf));
+ EXPECT_TRUE(features->Has(vixl::CPUFeatures::kAtomics));
+}
+
+// Check that ART ISA Features are propagated to VIXL for arm64 (using cortex-a53 as example).
+TEST_F(CodegenTest, ARM64IsaVIXLFeaturesA53) {
+ OverrideInstructionSetFeatures(InstructionSet::kArm64, "cortex-a53");
+ HGraph* graph = CreateGraph();
+ arm64::CodeGeneratorARM64 codegen(graph, *compiler_options_);
+ vixl::CPUFeatures* features = codegen.GetVIXLAssembler()->GetCPUFeatures();
+
+ EXPECT_TRUE(features->Has(vixl::CPUFeatures::kCRC32));
+ EXPECT_FALSE(features->Has(vixl::CPUFeatures::kDotProduct));
+ EXPECT_FALSE(features->Has(vixl::CPUFeatures::kFPHalf));
+ EXPECT_FALSE(features->Has(vixl::CPUFeatures::kAtomics));
+}
+
#endif
#ifdef ART_ENABLE_CODEGEN_mips
TEST_F(CodegenTest, MipsClobberRA) {
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- if (!CanExecute(InstructionSet::kMips) || features_mips->IsR6()) {
+ OverrideInstructionSetFeatures(InstructionSet::kMips, "mips32r");
+ CHECK(!instruction_set_features_->AsMipsInstructionSetFeatures()->IsR6());
+ if (!CanExecute(InstructionSet::kMips)) {
// HMipsComputeBaseMethodAddress and the NAL instruction behind it
// should only be generated on non-R6.
return;
@@ -860,7 +890,7 @@ TEST_F(CodegenTest, MipsClobberRA) {
graph->BuildDominatorTree();
- mips::CodeGeneratorMIPS codegenMIPS(graph, *features_mips.get(), CompilerOptions());
+ mips::CodeGeneratorMIPS codegenMIPS(graph, *compiler_options_);
// Since there isn't HLoadClass or HLoadString, we need to manually indicate
// that RA is clobbered and the method entry code should generate a stack frame
// and preserve RA in it. And this is what we're testing here.
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index c41c290c8b..dde39d46f3 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -17,17 +17,11 @@
#ifndef ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
#define ART_COMPILER_OPTIMIZING_CODEGEN_TEST_UTILS_H_
-#include "arch/arm/instruction_set_features_arm.h"
#include "arch/arm/registers_arm.h"
-#include "arch/arm64/instruction_set_features_arm64.h"
#include "arch/instruction_set.h"
-#include "arch/mips/instruction_set_features_mips.h"
#include "arch/mips/registers_mips.h"
-#include "arch/mips64/instruction_set_features_mips64.h"
#include "arch/mips64/registers_mips64.h"
-#include "arch/x86/instruction_set_features_x86.h"
#include "arch/x86/registers_x86.h"
-#include "arch/x86_64/instruction_set_features_x86_64.h"
#include "code_simulator.h"
#include "code_simulator_container.h"
#include "common_compiler_test.h"
@@ -101,15 +95,13 @@ class CodegenTargetConfig {
// to just overwrite the code generator.
class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
public:
- TestCodeGeneratorARMVIXL(HGraph* graph,
- const ArmInstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm::CodeGeneratorARMVIXL(graph, isa_features, compiler_options) {
+ TestCodeGeneratorARMVIXL(HGraph* graph, const CompilerOptions& compiler_options)
+ : arm::CodeGeneratorARMVIXL(graph, compiler_options) {
AddAllocatedRegister(Location::RegisterLocation(arm::R6));
AddAllocatedRegister(Location::RegisterLocation(arm::R7));
}
- void SetupBlockedRegisters() const OVERRIDE {
+ void SetupBlockedRegisters() const override {
arm::CodeGeneratorARMVIXL::SetupBlockedRegisters();
blocked_core_registers_[arm::R4] = true;
blocked_core_registers_[arm::R6] = false;
@@ -117,7 +109,7 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
}
void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+ Location temp_loc ATTRIBUTE_UNUSED) override {
// When turned on, the marking register checks in
// CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the
// Thread Register and the Marking Register to be set to
@@ -145,13 +137,11 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL {
// function.
class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
public:
- TestCodeGeneratorARM64(HGraph* graph,
- const Arm64InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {}
+ TestCodeGeneratorARM64(HGraph* graph, const CompilerOptions& compiler_options)
+ : arm64::CodeGeneratorARM64(graph, compiler_options) {}
void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED,
- Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE {
+ Location temp_loc ATTRIBUTE_UNUSED) override {
// When turned on, the marking register checks in
// CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the
// Thread Register and the Marking Register to be set to
@@ -165,15 +155,13 @@ class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 {
#ifdef ART_ENABLE_CODEGEN_x86
class TestCodeGeneratorX86 : public x86::CodeGeneratorX86 {
public:
- TestCodeGeneratorX86(HGraph* graph,
- const X86InstructionSetFeatures& isa_features,
- const CompilerOptions& compiler_options)
- : x86::CodeGeneratorX86(graph, isa_features, compiler_options) {
+ TestCodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
+ : x86::CodeGeneratorX86(graph, compiler_options) {
// Save edi, we need it for getting enough registers for long multiplication.
AddAllocatedRegister(Location::RegisterLocation(x86::EDI));
}
- void SetupBlockedRegisters() const OVERRIDE {
+ void SetupBlockedRegisters() const override {
x86::CodeGeneratorX86::SetupBlockedRegisters();
// ebx is a callee-save register in C, but caller-save for ART.
blocked_core_registers_[x86::EBX] = true;
@@ -188,14 +176,16 @@ class InternalCodeAllocator : public CodeAllocator {
public:
InternalCodeAllocator() : size_(0) { }
- virtual uint8_t* Allocate(size_t size) {
+ uint8_t* Allocate(size_t size) override {
size_ = size;
memory_.reset(new uint8_t[size]);
return memory_.get();
}
size_t GetSize() const { return size_; }
- uint8_t* GetMemory() const { return memory_.get(); }
+ ArrayRef<const uint8_t> GetMemory() const override {
+ return ArrayRef<const uint8_t>(memory_.get(), size_);
+ }
private:
size_t size_;
@@ -269,8 +259,8 @@ static void Run(const InternalCodeAllocator& allocator,
InstructionSet target_isa = codegen.GetInstructionSet();
typedef Expected (*fptr)();
- CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize());
- fptr f = reinterpret_cast<fptr>(allocator.GetMemory());
+ CommonCompilerTest::MakeExecutable(allocator.GetMemory().data(), allocator.GetMemory().size());
+ fptr f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(allocator.GetMemory().data()));
if (target_isa == InstructionSet::kThumb2) {
// For thumb we need the bottom bit set.
f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1);
@@ -298,7 +288,7 @@ static void RunCodeNoCheck(CodeGenerator* codegen,
{
ScopedArenaAllocator local_allocator(graph->GetArenaStack());
SsaLivenessAnalysis liveness(graph, codegen, &local_allocator);
- PrepareForRegisterAllocation(graph).Run();
+ PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions()).Run();
liveness.Analyze();
std::unique_ptr<RegisterAllocator> register_allocator =
RegisterAllocator::Create(&local_allocator, codegen, liveness);
@@ -322,11 +312,11 @@ static void RunCode(CodeGenerator* codegen,
template <typename Expected>
static void RunCode(CodegenTargetConfig target_config,
+ const CompilerOptions& compiler_options,
HGraph* graph,
std::function<void(HGraph*)> hook_before_codegen,
bool has_result,
Expected expected) {
- CompilerOptions compiler_options;
std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph,
compiler_options));
RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected);
@@ -334,55 +324,37 @@ static void RunCode(CodegenTargetConfig target_config,
#ifdef ART_ENABLE_CODEGEN_arm
CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const ArmInstructionSetFeatures> features_arm(
- ArmInstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator())
- TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options);
+ return new (graph->GetAllocator()) TestCodeGeneratorARMVIXL(graph, compiler_options);
}
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64(
- Arm64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator())
- TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options);
+ return new (graph->GetAllocator()) TestCodeGeneratorARM64(graph, compiler_options);
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86
CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator()) TestCodeGeneratorX86(
- graph, *features_x86.get(), compiler_options);
+ return new (graph->GetAllocator()) TestCodeGeneratorX86(graph, compiler_options);
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64(
- X86_64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator())
- x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options);
+ return new (graph->GetAllocator()) x86_64::CodeGeneratorX86_64(graph, compiler_options);
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips
CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const MipsInstructionSetFeatures> features_mips(
- MipsInstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator())
- mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options);
+ return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, compiler_options);
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips64
CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) {
- std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64(
- Mips64InstructionSetFeatures::FromCppDefines());
- return new (graph->GetAllocator())
- mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options);
+ return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, compiler_options);
}
#endif
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 356ff9f41f..7d3af9521a 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -17,7 +17,7 @@
#ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
#define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_
-#include "debug/dwarf/register.h"
+#include "dwarf/register.h"
#include "instruction_simplifier_shared.h"
#include "locations.h"
#include "nodes.h"
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index ed2f8e995d..5556f16740 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -151,23 +151,15 @@ inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* inst
return InputCPURegisterAt(instr, index);
}
-inline int64_t Int64ConstantFrom(Location location) {
- HConstant* instr = location.GetConstant();
- if (instr->IsIntConstant()) {
- return instr->AsIntConstant()->GetValue();
- } else if (instr->IsNullConstant()) {
- return 0;
- } else {
- DCHECK(instr->IsLongConstant()) << instr->DebugName();
- return instr->AsLongConstant()->GetValue();
- }
+inline int64_t Int64FromLocation(Location location) {
+ return Int64FromConstant(location.GetConstant());
}
inline vixl::aarch64::Operand OperandFrom(Location location, DataType::Type type) {
if (location.IsRegister()) {
return vixl::aarch64::Operand(RegisterFrom(location, type));
} else {
- return vixl::aarch64::Operand(Int64ConstantFrom(location));
+ return vixl::aarch64::Operand(Int64FromLocation(location));
}
}
@@ -234,6 +226,13 @@ inline vixl::aarch64::Operand OperandFromMemOperand(
}
}
+inline bool AddSubCanEncodeAsImmediate(int64_t value) {
+ // If `value` does not fit but `-value` does, VIXL will automatically use
+ // the 'opposite' instruction.
+ return vixl::aarch64::Assembler::IsImmAddSub(value)
+ || vixl::aarch64::Assembler::IsImmAddSub(-value);
+}
+
inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
int64_t value = CodeGenerator::GetInt64ValueOf(constant);
@@ -249,6 +248,20 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
return IsUint<8>(value);
}
+ // Code generation for Min/Max:
+ // Cmp left_op, right_op
+ // Csel dst, left_op, right_op, cond
+ if (instr->IsMin() || instr->IsMax()) {
+ if (constant->GetUses().HasExactlyOneElement()) {
+ // If value can be encoded as immediate for the Cmp, then let VIXL handle
+ // the constant generation for the Csel.
+ return AddSubCanEncodeAsImmediate(value);
+ }
+ // These values are encodable as immediates for Cmp and VIXL will use csinc and csinv
+ // with the zr register as right_op, hence no constant generation is required.
+ return constant->IsZeroBitPattern() || constant->IsOne() || constant->IsMinusOne();
+ }
+
// For single uses we let VIXL handle the constant generation since it will
// use registers that are not managed by the register allocator (wip0, wip1).
if (constant->GetUses().HasExactlyOneElement()) {
@@ -275,10 +288,7 @@ inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction*
instr->IsSub())
<< instr->DebugName();
// Uses aliases of ADD/SUB instructions.
- // If `value` does not fit but `-value` does, VIXL will automatically use
- // the 'opposite' instruction.
- return vixl::aarch64::Assembler::IsImmAddSub(value)
- || vixl::aarch64::Assembler::IsImmAddSub(-value);
+ return AddSubCanEncodeAsImmediate(value);
}
}
diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc
index 6f11e628ee..09e7cabfa4 100644
--- a/compiler/optimizing/constant_folding.cc
+++ b/compiler/optimizing/constant_folding.cc
@@ -26,13 +26,13 @@ class HConstantFoldingVisitor : public HGraphDelegateVisitor {
: HGraphDelegateVisitor(graph) {}
private:
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
+ void VisitBasicBlock(HBasicBlock* block) override;
- void VisitUnaryOperation(HUnaryOperation* inst) OVERRIDE;
- void VisitBinaryOperation(HBinaryOperation* inst) OVERRIDE;
+ void VisitUnaryOperation(HUnaryOperation* inst) override;
+ void VisitBinaryOperation(HBinaryOperation* inst) override;
- void VisitTypeConversion(HTypeConversion* inst) OVERRIDE;
- void VisitDivZeroCheck(HDivZeroCheck* inst) OVERRIDE;
+ void VisitTypeConversion(HTypeConversion* inst) override;
+ void VisitDivZeroCheck(HDivZeroCheck* inst) override;
DISALLOW_COPY_AND_ASSIGN(HConstantFoldingVisitor);
};
@@ -47,34 +47,35 @@ class InstructionWithAbsorbingInputSimplifier : public HGraphVisitor {
private:
void VisitShift(HBinaryOperation* shift);
- void VisitEqual(HEqual* instruction) OVERRIDE;
- void VisitNotEqual(HNotEqual* instruction) OVERRIDE;
-
- void VisitAbove(HAbove* instruction) OVERRIDE;
- void VisitAboveOrEqual(HAboveOrEqual* instruction) OVERRIDE;
- void VisitBelow(HBelow* instruction) OVERRIDE;
- void VisitBelowOrEqual(HBelowOrEqual* instruction) OVERRIDE;
-
- void VisitAnd(HAnd* instruction) OVERRIDE;
- void VisitCompare(HCompare* instruction) OVERRIDE;
- void VisitMul(HMul* instruction) OVERRIDE;
- void VisitOr(HOr* instruction) OVERRIDE;
- void VisitRem(HRem* instruction) OVERRIDE;
- void VisitShl(HShl* instruction) OVERRIDE;
- void VisitShr(HShr* instruction) OVERRIDE;
- void VisitSub(HSub* instruction) OVERRIDE;
- void VisitUShr(HUShr* instruction) OVERRIDE;
- void VisitXor(HXor* instruction) OVERRIDE;
+ void VisitEqual(HEqual* instruction) override;
+ void VisitNotEqual(HNotEqual* instruction) override;
+
+ void VisitAbove(HAbove* instruction) override;
+ void VisitAboveOrEqual(HAboveOrEqual* instruction) override;
+ void VisitBelow(HBelow* instruction) override;
+ void VisitBelowOrEqual(HBelowOrEqual* instruction) override;
+
+ void VisitAnd(HAnd* instruction) override;
+ void VisitCompare(HCompare* instruction) override;
+ void VisitMul(HMul* instruction) override;
+ void VisitOr(HOr* instruction) override;
+ void VisitRem(HRem* instruction) override;
+ void VisitShl(HShl* instruction) override;
+ void VisitShr(HShr* instruction) override;
+ void VisitSub(HSub* instruction) override;
+ void VisitUShr(HUShr* instruction) override;
+ void VisitXor(HXor* instruction) override;
};
-void HConstantFolding::Run() {
+bool HConstantFolding::Run() {
HConstantFoldingVisitor visitor(graph_);
// Process basic blocks in reverse post-order in the dominator tree,
// so that an instruction turned into a constant, used as input of
// another instruction, may possibly be used to turn that second
// instruction into a constant as well.
visitor.VisitReversePostOrder();
+ return true;
}
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index 05c6df4a93..72bd95b3cb 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -41,7 +41,7 @@ class HConstantFolding : public HOptimization {
public:
HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kConstantFoldingPassName = "constant_folding";
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index d27104752b..74d9d3a993 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -16,8 +16,6 @@
#include <functional>
-#include "arch/x86/instruction_set_features_x86.h"
-#include "code_generator_x86.h"
#include "constant_folding.h"
#include "dead_code_elimination.h"
#include "driver/compiler_options.h"
@@ -60,9 +58,6 @@ class ConstantFoldingTest : public OptimizingUnitTest {
std::string actual_before = printer_before.str();
EXPECT_EQ(expected_before, actual_before);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
HConstantFolding(graph_, "constant_folding").Run();
GraphChecker graph_checker_cf(graph_);
graph_checker_cf.Run();
@@ -75,7 +70,7 @@ class ConstantFoldingTest : public OptimizingUnitTest {
check_after_cf(graph_);
- HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run();
+ HDeadCodeElimination(graph_, /* stats= */ nullptr, "dead_code_elimination").Run();
GraphChecker graph_checker_dce(graph_);
graph_checker_dce.Run();
ASSERT_TRUE(graph_checker_dce.IsValid());
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
index 4a66cd2265..3a1a9e023d 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.cc
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc
@@ -34,7 +34,7 @@ class CFREVisitor : public HGraphVisitor {
candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)),
stats_(stats) {}
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
// Visit all instructions in block.
HGraphVisitor::VisitBasicBlock(block);
@@ -43,86 +43,86 @@ class CFREVisitor : public HGraphVisitor {
MergeCandidateFences();
}
- void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE {
+ void VisitConstructorFence(HConstructorFence* constructor_fence) override {
candidate_fences_.push_back(constructor_fence);
for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) {
- candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx));
+ candidate_fence_targets_.insert(constructor_fence->InputAt(input_idx));
}
}
- void VisitBoundType(HBoundType* bound_type) OVERRIDE {
+ void VisitBoundType(HBoundType* bound_type) override {
VisitAlias(bound_type);
}
- void VisitNullCheck(HNullCheck* null_check) OVERRIDE {
+ void VisitNullCheck(HNullCheck* null_check) override {
VisitAlias(null_check);
}
- void VisitSelect(HSelect* select) OVERRIDE {
+ void VisitSelect(HSelect* select) override {
VisitAlias(select);
}
- void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
HInstruction* value = instruction->InputAt(1);
VisitSetLocation(instruction, value);
}
- void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
HInstruction* value = instruction->InputAt(1);
VisitSetLocation(instruction, value);
}
- void VisitArraySet(HArraySet* instruction) OVERRIDE {
+ void VisitArraySet(HArraySet* instruction) override {
HInstruction* value = instruction->InputAt(2);
VisitSetLocation(instruction, value);
}
- void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) {
+ void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) override {
// Pessimize: Merge all fences.
MergeCandidateFences();
}
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+ void VisitInvokeInterface(HInvokeInterface* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
+ void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE {
+ void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override {
HandleInvoke(invoke);
}
- void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE {
+ void VisitClinitCheck(HClinitCheck* clinit) override {
HandleInvoke(clinit);
}
- void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE {
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE {
+ void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE {
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE {
+ void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
@@ -208,13 +208,13 @@ class CFREVisitor : public HGraphVisitor {
// there is no benefit to this extra complexity unless we also reordered
// the stores to come later.
candidate_fences_.clear();
- candidate_fence_targets_.Clear();
+ candidate_fence_targets_.clear();
}
// A publishing 'store' is only interesting if the value being stored
// is one of the fence `targets` in `candidate_fences`.
bool IsInterestingPublishTarget(HInstruction* store_input) const {
- return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end();
+ return candidate_fence_targets_.find(store_input) != candidate_fence_targets_.end();
}
void MaybeMerge(HConstructorFence* target, HConstructorFence* src) {
@@ -250,13 +250,14 @@ class CFREVisitor : public HGraphVisitor {
DISALLOW_COPY_AND_ASSIGN(CFREVisitor);
};
-void ConstructorFenceRedundancyElimination::Run() {
+bool ConstructorFenceRedundancyElimination::Run() {
CFREVisitor cfre_visitor(graph_, stats_);
// Arbitrarily visit in reverse-post order.
// The exact block visit order does not matter, as the algorithm
// only operates on a single block at a time.
cfre_visitor.VisitReversePostOrder();
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h
index f4b06d5544..014b342258 100644
--- a/compiler/optimizing/constructor_fence_redundancy_elimination.h
+++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h
@@ -52,7 +52,7 @@ class ConstructorFenceRedundancyElimination : public HOptimization {
const char* name = kCFREPassName)
: HOptimization(graph, name, stats) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kCFREPassName = "constructor_fence_redundancy_elimination";
diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h
index 4a6c91459f..3cbcc9e0c3 100644
--- a/compiler/optimizing/data_type.h
+++ b/compiler/optimizing/data_type.h
@@ -210,6 +210,42 @@ class DataType {
static bool IsTypeConversionImplicit(Type input_type, Type result_type);
static bool IsTypeConversionImplicit(int64_t value, Type result_type);
+ static bool IsZeroExtension(Type input_type, Type result_type) {
+ return IsIntOrLongType(result_type) &&
+ IsUnsignedType(input_type) &&
+ Size(result_type) > Size(input_type);
+ }
+
+ static Type ToSigned(Type type) {
+ switch (type) {
+ case Type::kUint8:
+ return Type::kInt8;
+ case Type::kUint16:
+ return Type::kInt16;
+ case Type::kUint32:
+ return Type::kInt32;
+ case Type::kUint64:
+ return Type::kInt64;
+ default:
+ return type;
+ }
+ }
+
+ static Type ToUnsigned(Type type) {
+ switch (type) {
+ case Type::kInt8:
+ return Type::kUint8;
+ case Type::kInt16:
+ return Type::kUint16;
+ case Type::kInt32:
+ return Type::kUint32;
+ case Type::kInt64:
+ return Type::kUint64;
+ default:
+ return type;
+ }
+ }
+
static const char* PrettyDescriptor(Type type);
private:
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index 9fa0f72e80..1dc10948cc 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -508,7 +508,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() {
}
}
-void HDeadCodeElimination::Run() {
+bool HDeadCodeElimination::Run() {
// Do not eliminate dead blocks if the graph has irreducible loops. We could
// support it, but that would require changes in our loop representation to handle
// multiple entry points. We decided it was not worth the complexity.
@@ -526,6 +526,7 @@ void HDeadCodeElimination::Run() {
}
SsaRedundantPhiElimination(graph_).Run();
RemoveDeadInstructions();
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 92a7f562e1..799721acf2 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -32,7 +32,8 @@ class HDeadCodeElimination : public HOptimization {
HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name)
: HOptimization(graph, name, stats) {}
- void Run() OVERRIDE;
+ bool Run() override;
+
static constexpr const char* kDeadCodeEliminationPassName = "dead_code_elimination";
private:
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index adb6ce1187..f5cd4dc27a 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -16,8 +16,6 @@
#include "dead_code_elimination.h"
-#include "arch/x86/instruction_set_features_x86.h"
-#include "code_generator_x86.h"
#include "driver/compiler_options.h"
#include "graph_checker.h"
#include "optimizing_unit_test.h"
@@ -45,10 +43,7 @@ void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data,
std::string actual_before = printer_before.str();
ASSERT_EQ(actual_before, expected_before);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
- HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run();
+ HDeadCodeElimination(graph, /* stats= */ nullptr, "dead_code_elimination").Run();
GraphChecker graph_checker(graph);
graph_checker.Run();
ASSERT_TRUE(graph_checker.IsValid());
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
index b63914faf7..63a370a47b 100644
--- a/compiler/optimizing/emit_swap_mips_test.cc
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -27,12 +27,13 @@ namespace art {
class EmitSwapMipsTest : public OptimizingUnitTest {
public:
- void SetUp() OVERRIDE {
+ void SetUp() override {
+ instruction_set_ = InstructionSet::kMips;
+ instruction_set_features_ = MipsInstructionSetFeatures::FromCppDefines();
+ OptimizingUnitTest::SetUp();
graph_ = CreateGraph();
- isa_features_ = MipsInstructionSetFeatures::FromCppDefines();
- codegen_ = new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_,
- *isa_features_.get(),
- CompilerOptions());
+ codegen_.reset(
+ new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, *compiler_options_));
moves_ = new (GetAllocator()) HParallelMove(GetAllocator());
test_helper_.reset(
new AssemblerTestInfrastructure(GetArchitectureString(),
@@ -45,10 +46,12 @@ class EmitSwapMipsTest : public OptimizingUnitTest {
GetAssemblyHeader()));
}
- void TearDown() OVERRIDE {
+ void TearDown() override {
test_helper_.reset();
- isa_features_.reset();
+ codegen_.reset();
+ graph_ = nullptr;
ResetPoolAndAllocator();
+ OptimizingUnitTest::TearDown();
}
// Get the typically used name for this architecture.
@@ -106,10 +109,9 @@ class EmitSwapMipsTest : public OptimizingUnitTest {
protected:
HGraph* graph_;
HParallelMove* moves_;
- mips::CodeGeneratorMIPS* codegen_;
+ std::unique_ptr<mips::CodeGeneratorMIPS> codegen_;
mips::MipsAssembler* assembler_;
std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
- std::unique_ptr<const MipsInstructionSetFeatures> isa_features_;
};
TEST_F(EmitSwapMipsTest, TwoRegisters) {
diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc
index c88baa8610..01d9603802 100644
--- a/compiler/optimizing/graph_checker.cc
+++ b/compiler/optimizing/graph_checker.cc
@@ -25,6 +25,11 @@
#include "base/bit_vector-inl.h"
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
+#include "handle.h"
+#include "mirror/class.h"
+#include "obj_ptr-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "subtype_check.h"
namespace art {
@@ -53,6 +58,30 @@ static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) {
!boundary->IsEntry();
}
+
+size_t GraphChecker::Run(bool pass_change, size_t last_size) {
+ size_t current_size = GetGraph()->GetReversePostOrder().size();
+ if (!pass_change) {
+ // Nothing changed for certain. Do a quick sanity check on that assertion
+ // for anything other than the first call (when last size was still 0).
+ if (last_size != 0) {
+ if (current_size != last_size) {
+ AddError(StringPrintf("Incorrect no-change assertion, "
+ "last graph size %zu vs current graph size %zu",
+ last_size, current_size));
+ }
+ }
+ // TODO: if we would trust the "false" value of the flag completely, we
+ // could skip checking the graph at this point.
+ }
+
+ // VisitReversePostOrder is used instead of VisitInsertionOrder,
+ // as the latter might visit dead blocks removed by the dominator
+ // computation.
+ VisitReversePostOrder();
+ return current_size;
+}
+
void GraphChecker::VisitBasicBlock(HBasicBlock* block) {
current_block_ = block;
@@ -548,30 +577,85 @@ void GraphChecker::VisitReturnVoid(HReturnVoid* ret) {
}
}
-void GraphChecker::VisitCheckCast(HCheckCast* check) {
- VisitInstruction(check);
- HInstruction* input = check->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
+void GraphChecker::CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name) {
+ if (!check->InputAt(input_pos)->IsIntConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HIntConstant input %zu (%s), not %s:%d.",
check->DebugName(),
check->GetId(),
- input->DebugName(),
- input->GetId()));
+ input_pos,
+ name,
+ check->InputAt(2)->DebugName(),
+ check->InputAt(2)->GetId()));
+ } else if (check_value) {
+ uint32_t actual_value =
+ static_cast<uint32_t>(check->InputAt(input_pos)->AsIntConstant()->GetValue());
+ if (actual_value != expected_value) {
+ AddError(StringPrintf("%s:%d (bitstring) has %s 0x%x, not 0x%x as expected.",
+ check->DebugName(),
+ check->GetId(),
+ name,
+ actual_value,
+ expected_value));
+ }
}
}
-void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
- VisitInstruction(instruction);
- HInstruction* input = instruction->InputAt(1);
- if (!input->IsLoadClass()) {
- AddError(StringPrintf("%s:%d expects a HLoadClass as second input, not %s:%d.",
- instruction->DebugName(),
- instruction->GetId(),
- input->DebugName(),
- input->GetId()));
+void GraphChecker::HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ VisitInstruction(check);
+ HInstruction* input = check->InputAt(1);
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ if (!input->IsNullConstant()) {
+ AddError(StringPrintf("%s:%d (bitstring) expects a HNullConstant as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
+ bool check_values = false;
+ BitString::StorageType expected_path_to_root = 0u;
+ BitString::StorageType expected_mask = 0u;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> klass = check->GetClass().Get();
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ if (state == SubtypeCheckInfo::kAssigned) {
+ expected_path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass);
+ expected_mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass);
+ check_values = true;
+ } else {
+ AddError(StringPrintf("%s:%d (bitstring) references a class with unassigned bitstring.",
+ check->DebugName(),
+ check->GetId()));
+ }
+ }
+ CheckTypeCheckBitstringInput(
+ check, /* input_pos= */ 2, check_values, expected_path_to_root, "path_to_root");
+ CheckTypeCheckBitstringInput(check, /* input_pos= */ 3, check_values, expected_mask, "mask");
+ } else {
+ if (!input->IsLoadClass()) {
+ AddError(StringPrintf("%s:%d (classic) expects a HLoadClass as second input, not %s:%d.",
+ check->DebugName(),
+ check->GetId(),
+ input->DebugName(),
+ input->GetId()));
+ }
}
}
+void GraphChecker::VisitCheckCast(HCheckCast* check) {
+ HandleTypeCheckInstruction(check);
+}
+
+void GraphChecker::VisitInstanceOf(HInstanceOf* instruction) {
+ HandleTypeCheckInstruction(instruction);
+}
+
void GraphChecker::HandleLoop(HBasicBlock* loop_header) {
int id = loop_header->GetBlockId();
HLoopInformation* loop_information = loop_header->GetLoopInformation();
@@ -847,7 +931,7 @@ void GraphChecker::VisitPhi(HPhi* phi) {
// because the BitVector reallocation strategy has very bad worst-case behavior.
ArenaBitVector visited(&allocator,
GetGraph()->GetCurrentInstructionId(),
- /* expandable */ false,
+ /* expandable= */ false,
kArenaAllocGraphChecker);
visited.ClearAllBits();
if (!IsConstantEquivalent(phi, other_phi, &visited)) {
diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h
index 0f0b49d240..d085609197 100644
--- a/compiler/optimizing/graph_checker.h
+++ b/compiler/optimizing/graph_checker.h
@@ -38,39 +38,43 @@ class GraphChecker : public HGraphDelegateVisitor {
seen_ids_.ClearAllBits();
}
- // Check the whole graph (in reverse post-order).
- void Run() {
- // VisitReversePostOrder is used instead of VisitInsertionOrder,
- // as the latter might visit dead blocks removed by the dominator
- // computation.
- VisitReversePostOrder();
- }
-
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE;
-
- void VisitInstruction(HInstruction* instruction) OVERRIDE;
- void VisitPhi(HPhi* phi) OVERRIDE;
-
- void VisitBinaryOperation(HBinaryOperation* op) OVERRIDE;
- void VisitBooleanNot(HBooleanNot* instruction) OVERRIDE;
- void VisitBoundType(HBoundType* instruction) OVERRIDE;
- void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
- void VisitCheckCast(HCheckCast* check) OVERRIDE;
- void VisitCondition(HCondition* op) OVERRIDE;
- void VisitConstant(HConstant* instruction) OVERRIDE;
- void VisitDeoptimize(HDeoptimize* instruction) OVERRIDE;
- void VisitIf(HIf* instruction) OVERRIDE;
- void VisitInstanceOf(HInstanceOf* check) OVERRIDE;
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
- void VisitLoadException(HLoadException* load) OVERRIDE;
- void VisitNeg(HNeg* instruction) OVERRIDE;
- void VisitPackedSwitch(HPackedSwitch* instruction) OVERRIDE;
- void VisitReturn(HReturn* ret) OVERRIDE;
- void VisitReturnVoid(HReturnVoid* ret) OVERRIDE;
- void VisitSelect(HSelect* instruction) OVERRIDE;
- void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE;
- void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
-
+ // Check the whole graph. The pass_change parameter indicates whether changes
+ // may have occurred during the just executed pass. The default value is
+ // conservatively "true" (something may have changed). The last_size parameter
+ // and return value pass along the observed graph sizes.
+ size_t Run(bool pass_change = true, size_t last_size = 0);
+
+ void VisitBasicBlock(HBasicBlock* block) override;
+
+ void VisitInstruction(HInstruction* instruction) override;
+ void VisitPhi(HPhi* phi) override;
+
+ void VisitBinaryOperation(HBinaryOperation* op) override;
+ void VisitBooleanNot(HBooleanNot* instruction) override;
+ void VisitBoundType(HBoundType* instruction) override;
+ void VisitBoundsCheck(HBoundsCheck* check) override;
+ void VisitCheckCast(HCheckCast* check) override;
+ void VisitCondition(HCondition* op) override;
+ void VisitConstant(HConstant* instruction) override;
+ void VisitDeoptimize(HDeoptimize* instruction) override;
+ void VisitIf(HIf* instruction) override;
+ void VisitInstanceOf(HInstanceOf* check) override;
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override;
+ void VisitLoadException(HLoadException* load) override;
+ void VisitNeg(HNeg* instruction) override;
+ void VisitPackedSwitch(HPackedSwitch* instruction) override;
+ void VisitReturn(HReturn* ret) override;
+ void VisitReturnVoid(HReturnVoid* ret) override;
+ void VisitSelect(HSelect* instruction) override;
+ void VisitTryBoundary(HTryBoundary* try_boundary) override;
+ void VisitTypeConversion(HTypeConversion* instruction) override;
+
+ void CheckTypeCheckBitstringInput(HTypeCheckInstruction* check,
+ size_t input_pos,
+ bool check_value,
+ uint32_t expected_value,
+ const char* name);
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* instruction);
void HandleLoop(HBasicBlock* loop_header);
void HandleBooleanInput(HInstruction* instruction, size_t input_index);
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 5ff31cead5..2a7bbcb72f 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -106,8 +106,7 @@ std::ostream& operator<<(std::ostream& os, const StringList& list) {
}
}
-typedef Disassembler* create_disasm_prototype(InstructionSet instruction_set,
- DisassemblerOptions* options);
+using create_disasm_prototype = Disassembler*(InstructionSet, DisassemblerOptions*);
class HGraphVisualizerDisassembler {
public:
HGraphVisualizerDisassembler(InstructionSet instruction_set,
@@ -131,10 +130,10 @@ class HGraphVisualizerDisassembler {
// been generated, so we can read data in literal pools.
disassembler_ = std::unique_ptr<Disassembler>((*create_disassembler)(
instruction_set,
- new DisassemblerOptions(/* absolute_addresses */ false,
+ new DisassemblerOptions(/* absolute_addresses= */ false,
base_address,
end_address,
- /* can_read_literals */ true,
+ /* can_read_literals= */ true,
Is64BitInstructionSet(instruction_set)
? &Thread::DumpThreadOffset<PointerSize::k64>
: &Thread::DumpThreadOffset<PointerSize::k32>)));
@@ -333,7 +332,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
return output_;
}
- void VisitParallelMove(HParallelMove* instruction) OVERRIDE {
+ void VisitParallelMove(HParallelMove* instruction) override {
StartAttributeStream("liveness") << instruction->GetLifetimePosition();
StringList moves;
for (size_t i = 0, e = instruction->NumMoves(); i < e; ++i) {
@@ -346,36 +345,36 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
StartAttributeStream("moves") << moves;
}
- void VisitIntConstant(HIntConstant* instruction) OVERRIDE {
+ void VisitIntConstant(HIntConstant* instruction) override {
StartAttributeStream() << instruction->GetValue();
}
- void VisitLongConstant(HLongConstant* instruction) OVERRIDE {
+ void VisitLongConstant(HLongConstant* instruction) override {
StartAttributeStream() << instruction->GetValue();
}
- void VisitFloatConstant(HFloatConstant* instruction) OVERRIDE {
+ void VisitFloatConstant(HFloatConstant* instruction) override {
StartAttributeStream() << instruction->GetValue();
}
- void VisitDoubleConstant(HDoubleConstant* instruction) OVERRIDE {
+ void VisitDoubleConstant(HDoubleConstant* instruction) override {
StartAttributeStream() << instruction->GetValue();
}
- void VisitPhi(HPhi* phi) OVERRIDE {
+ void VisitPhi(HPhi* phi) override {
StartAttributeStream("reg") << phi->GetRegNumber();
StartAttributeStream("is_catch_phi") << std::boolalpha << phi->IsCatchPhi() << std::noboolalpha;
}
- void VisitMemoryBarrier(HMemoryBarrier* barrier) OVERRIDE {
+ void VisitMemoryBarrier(HMemoryBarrier* barrier) override {
StartAttributeStream("kind") << barrier->GetBarrierKind();
}
- void VisitMonitorOperation(HMonitorOperation* monitor) OVERRIDE {
+ void VisitMonitorOperation(HMonitorOperation* monitor) override {
StartAttributeStream("kind") << (monitor->IsEnter() ? "enter" : "exit");
}
- void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+ void VisitLoadClass(HLoadClass* load_class) override {
StartAttributeStream("load_kind") << load_class->GetLoadKind();
const char* descriptor = load_class->GetDexFile().GetTypeDescriptor(
load_class->GetDexFile().GetTypeId(load_class->GetTypeIndex()));
@@ -386,23 +385,42 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< load_class->NeedsAccessCheck() << std::noboolalpha;
}
- void VisitLoadString(HLoadString* load_string) OVERRIDE {
+ void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override {
+ StartAttributeStream("load_kind") << "RuntimeCall";
+ StartAttributeStream("method_handle_index") << load_method_handle->GetMethodHandleIndex();
+ }
+
+ void VisitLoadMethodType(HLoadMethodType* load_method_type) override {
+ StartAttributeStream("load_kind") << "RuntimeCall";
+ const DexFile& dex_file = load_method_type->GetDexFile();
+ const dex::ProtoId& proto_id = dex_file.GetProtoId(load_method_type->GetProtoIndex());
+ StartAttributeStream("method_type") << dex_file.GetProtoSignature(proto_id);
+ }
+
+ void VisitLoadString(HLoadString* load_string) override {
StartAttributeStream("load_kind") << load_string->GetLoadKind();
}
- void VisitCheckCast(HCheckCast* check_cast) OVERRIDE {
- StartAttributeStream("check_kind") << check_cast->GetTypeCheckKind();
+ void HandleTypeCheckInstruction(HTypeCheckInstruction* check) {
+ StartAttributeStream("check_kind") << check->GetTypeCheckKind();
StartAttributeStream("must_do_null_check") << std::boolalpha
- << check_cast->MustDoNullCheck() << std::noboolalpha;
+ << check->MustDoNullCheck() << std::noboolalpha;
+ if (check->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ StartAttributeStream("path_to_root") << std::hex
+ << "0x" << check->GetBitstringPathToRoot() << std::dec;
+ StartAttributeStream("mask") << std::hex << "0x" << check->GetBitstringMask() << std::dec;
+ }
}
- void VisitInstanceOf(HInstanceOf* instance_of) OVERRIDE {
- StartAttributeStream("check_kind") << instance_of->GetTypeCheckKind();
- StartAttributeStream("must_do_null_check") << std::boolalpha
- << instance_of->MustDoNullCheck() << std::noboolalpha;
+ void VisitCheckCast(HCheckCast* check_cast) override {
+ HandleTypeCheckInstruction(check_cast);
+ }
+
+ void VisitInstanceOf(HInstanceOf* instance_of) override {
+ HandleTypeCheckInstruction(instance_of);
}
- void VisitArrayLength(HArrayLength* array_length) OVERRIDE {
+ void VisitArrayLength(HArrayLength* array_length) override {
StartAttributeStream("is_string_length") << std::boolalpha
<< array_length->IsStringLength() << std::noboolalpha;
if (array_length->IsEmittedAtUseSite()) {
@@ -410,31 +428,31 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
}
- void VisitBoundsCheck(HBoundsCheck* bounds_check) OVERRIDE {
+ void VisitBoundsCheck(HBoundsCheck* bounds_check) override {
StartAttributeStream("is_string_char_at") << std::boolalpha
<< bounds_check->IsStringCharAt() << std::noboolalpha;
}
- void VisitArrayGet(HArrayGet* array_get) OVERRIDE {
+ void VisitArrayGet(HArrayGet* array_get) override {
StartAttributeStream("is_string_char_at") << std::boolalpha
<< array_get->IsStringCharAt() << std::noboolalpha;
}
- void VisitArraySet(HArraySet* array_set) OVERRIDE {
+ void VisitArraySet(HArraySet* array_set) override {
StartAttributeStream("value_can_be_null") << std::boolalpha
<< array_set->GetValueCanBeNull() << std::noboolalpha;
StartAttributeStream("needs_type_check") << std::boolalpha
<< array_set->NeedsTypeCheck() << std::noboolalpha;
}
- void VisitCompare(HCompare* compare) OVERRIDE {
+ void VisitCompare(HCompare* compare) override {
ComparisonBias bias = compare->GetBias();
StartAttributeStream("bias") << (bias == ComparisonBias::kGtBias
? "gt"
: (bias == ComparisonBias::kLtBias ? "lt" : "none"));
}
- void VisitInvoke(HInvoke* invoke) OVERRIDE {
+ void VisitInvoke(HInvoke* invoke) override {
StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
ArtMethod* method = invoke->GetResolvedMethod();
// We don't print signatures, which conflict with c1visualizer format.
@@ -451,12 +469,12 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< std::noboolalpha;
}
- void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
+ void VisitInvokeUnresolved(HInvokeUnresolved* invoke) override {
VisitInvoke(invoke);
StartAttributeStream("invoke_type") << invoke->GetInvokeType();
}
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override {
VisitInvoke(invoke);
StartAttributeStream("method_load_kind") << invoke->GetMethodLoadKind();
StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
@@ -465,96 +483,104 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
}
- void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) override {
VisitInvoke(invoke);
StartAttributeStream("intrinsic") << invoke->GetIntrinsic();
}
- void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE {
+ void VisitInvokePolymorphic(HInvokePolymorphic* invoke) override {
VisitInvoke(invoke);
StartAttributeStream("invoke_type") << "InvokePolymorphic";
}
- void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE {
+ void VisitInstanceFieldGet(HInstanceFieldGet* iget) override {
StartAttributeStream("field_name") <<
iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iget->GetFieldType();
}
- void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE {
+ void VisitInstanceFieldSet(HInstanceFieldSet* iset) override {
StartAttributeStream("field_name") <<
iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << iset->GetFieldType();
}
- void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE {
+ void VisitStaticFieldGet(HStaticFieldGet* sget) override {
StartAttributeStream("field_name") <<
sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << sget->GetFieldType();
}
- void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE {
+ void VisitStaticFieldSet(HStaticFieldSet* sset) override {
StartAttributeStream("field_name") <<
sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(),
/* with type */ false);
StartAttributeStream("field_type") << sset->GetFieldType();
}
- void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE {
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) override {
StartAttributeStream("field_type") << field_access->GetFieldType();
}
- void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) OVERRIDE {
+ void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* field_access) override {
StartAttributeStream("field_type") << field_access->GetFieldType();
}
- void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) OVERRIDE {
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* field_access) override {
StartAttributeStream("field_type") << field_access->GetFieldType();
}
- void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) OVERRIDE {
+ void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* field_access) override {
StartAttributeStream("field_type") << field_access->GetFieldType();
}
- void VisitTryBoundary(HTryBoundary* try_boundary) OVERRIDE {
+ void VisitTryBoundary(HTryBoundary* try_boundary) override {
StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
}
- void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+ void VisitDeoptimize(HDeoptimize* deoptimize) override {
StartAttributeStream("kind") << deoptimize->GetKind();
}
- void VisitVecOperation(HVecOperation* vec_operation) OVERRIDE {
+ void VisitVecOperation(HVecOperation* vec_operation) override {
StartAttributeStream("packed_type") << vec_operation->GetPackedType();
}
- void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) OVERRIDE {
+ void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) override {
StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString();
}
- void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE {
+ void VisitVecHalvingAdd(HVecHalvingAdd* hadd) override {
VisitVecBinaryOperation(hadd);
StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha;
}
- void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE {
+ void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) override {
VisitVecOperation(instruction);
StartAttributeStream("kind") << instruction->GetOpKind();
}
+ void VisitVecDotProd(HVecDotProd* instruction) override {
+ VisitVecOperation(instruction);
+ DataType::Type arg_type = instruction->InputAt(1)->AsVecOperation()->GetPackedType();
+ StartAttributeStream("type") << (instruction->IsZeroExtending() ?
+ DataType::ToUnsigned(arg_type) :
+ DataType::ToSigned(arg_type));
+ }
+
#if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
- void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
+ void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) override {
StartAttributeStream("kind") << instruction->GetOpKind();
}
- void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) OVERRIDE {
+ void VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) override {
StartAttributeStream("kind") << instruction->GetOpKind();
}
- void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) OVERRIDE {
+ void VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) override {
StartAttributeStream("kind") << instruction->GetInstrKind() << "+" << instruction->GetOpKind();
if (HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())) {
StartAttributeStream("shift") << instruction->GetShiftAmount();
@@ -576,6 +602,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
}
StartAttributeStream() << input_list;
}
+ if (instruction->GetDexPc() != kNoDexPc) {
+ StartAttributeStream("dex_pc") << instruction->GetDexPc();
+ } else {
+ StartAttributeStream("dex_pc") << "n/a";
+ }
instruction->Accept(this);
if (instruction->HasEnvironment()) {
StringList envs;
@@ -641,20 +672,32 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
<< std::boolalpha << loop_info->IsIrreducible() << std::noboolalpha;
}
+ // For the builder and the inliner, we want to add extra information on HInstructions
+ // that have reference types, and also HInstanceOf/HCheckcast.
if ((IsPass(HGraphBuilder::kBuilderPassName)
|| IsPass(HInliner::kInlinerPassName))
- && (instruction->GetType() == DataType::Type::kReference)) {
- ReferenceTypeInfo info = instruction->IsLoadClass()
- ? instruction->AsLoadClass()->GetLoadedClassRTI()
- : instruction->GetReferenceTypeInfo();
+ && (instruction->GetType() == DataType::Type::kReference ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast())) {
+ ReferenceTypeInfo info = (instruction->GetType() == DataType::Type::kReference)
+ ? instruction->IsLoadClass()
+ ? instruction->AsLoadClass()->GetLoadedClassRTI()
+ : instruction->GetReferenceTypeInfo()
+ : instruction->IsInstanceOf()
+ ? instruction->AsInstanceOf()->GetTargetClassRTI()
+ : instruction->AsCheckCast()->GetTargetClassRTI();
ScopedObjectAccess soa(Thread::Current());
if (info.IsValid()) {
StartAttributeStream("klass")
<< mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
- StartAttributeStream("can_be_null")
- << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ if (instruction->GetType() == DataType::Type::kReference) {
+ StartAttributeStream("can_be_null")
+ << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
+ }
StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
- } else if (instruction->IsLoadClass()) {
+ } else if (instruction->IsLoadClass() ||
+ instruction->IsInstanceOf() ||
+ instruction->IsCheckCast()) {
StartAttributeStream("klass") << "unresolved";
} else {
// The NullConstant may be added to the graph during other passes that happen between
@@ -778,7 +821,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor {
Flush();
}
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
StartTag("block");
PrintProperty("name", "B", block->GetBlockId());
if (block->GetLifetimeStart() != kNoLifetime) {
@@ -881,8 +924,8 @@ void HGraphVisualizer::DumpGraphWithDisassembly() const {
HGraphVisualizerPrinter printer(graph_,
*output_,
"disassembly",
- /* is_after_pass */ true,
- /* graph_in_bad_state */ false,
+ /* is_after_pass= */ true,
+ /* graph_in_bad_state= */ false,
codegen_,
codegen_.GetDisassemblyInformation());
printer.Run();
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index f05159b735..e8460a843f 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -43,7 +43,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> {
buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
num_entries_(0u) {
- // ArenaAllocator returns zeroed memory, so no need to set buckets to null.
DCHECK(IsPowerOfTwo(num_buckets_));
std::fill_n(buckets_, num_buckets_, nullptr);
buckets_owned_.SetInitialBits(num_buckets_);
@@ -57,8 +56,6 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> {
buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)),
buckets_owned_(allocator, num_buckets_, false, kArenaAllocGvn),
num_entries_(0u) {
- // ArenaAllocator returns zeroed memory, so entries of buckets_ and
- // buckets_owned_ are initialized to null and false, respectively.
DCHECK(IsPowerOfTwo(num_buckets_));
PopulateFromInternal(other);
}
@@ -348,11 +345,11 @@ class GlobalValueNumberer : public ValueObject {
side_effects_(side_effects),
sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)),
visited_blocks_(
- &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {
+ &allocator_, graph->GetBlocks().size(), /* expandable= */ false, kArenaAllocGvn) {
visited_blocks_.ClearAllBits();
}
- void Run();
+ bool Run();
private:
// Per-block GVN. Will also update the ValueSet of the dominated and
@@ -397,7 +394,7 @@ class GlobalValueNumberer : public ValueObject {
DISALLOW_COPY_AND_ASSIGN(GlobalValueNumberer);
};
-void GlobalValueNumberer::Run() {
+bool GlobalValueNumberer::Run() {
DCHECK(side_effects_.HasRun());
sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_);
@@ -406,6 +403,7 @@ void GlobalValueNumberer::Run() {
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
VisitBasicBlock(block);
}
+ return true;
}
void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
@@ -478,7 +476,10 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) {
HInstruction* next = current->GetNext();
// Do not kill the set with the side effects of the instruction just now: if
// the instruction is GVN'ed, we don't need to kill.
- if (current->CanBeMoved()) {
+ //
+ // BoundType is a special case example of an instruction which shouldn't be moved but can be
+ // GVN'ed.
+ if (current->CanBeMoved() || current->IsBoundType()) {
if (current->IsBinaryOperation() && current->AsBinaryOperation()->IsCommutative()) {
// For commutative ops, (x op y) will be treated the same as (y op x)
// after fixed ordering.
@@ -542,12 +543,12 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
// that is larger, we return it if no perfectly-matching set is found.
// Note that we defer testing WillBeReferencedAgain until all other criteria
// have been satisfied because it might be expensive.
- if (current_set->CanHoldCopyOf(reference_set, /* exact_match */ true)) {
+ if (current_set->CanHoldCopyOf(reference_set, /* exact_match= */ true)) {
if (!WillBeReferencedAgain(current_block)) {
return current_block;
}
} else if (secondary_match == nullptr &&
- current_set->CanHoldCopyOf(reference_set, /* exact_match */ false)) {
+ current_set->CanHoldCopyOf(reference_set, /* exact_match= */ false)) {
if (!WillBeReferencedAgain(current_block)) {
secondary_match = current_block;
}
@@ -557,9 +558,9 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet(
return secondary_match;
}
-void GVNOptimization::Run() {
+bool GVNOptimization::Run() {
GlobalValueNumberer gvn(graph_, side_effects_);
- gvn.Run();
+ return gvn.Run();
}
} // namespace art
diff --git a/compiler/optimizing/gvn.h b/compiler/optimizing/gvn.h
index 4fdba26ebd..bbf2265e98 100644
--- a/compiler/optimizing/gvn.h
+++ b/compiler/optimizing/gvn.h
@@ -31,7 +31,7 @@ class GVNOptimization : public HOptimization {
const char* pass_name = kGlobalValueNumberingPassName)
: HOptimization(graph, pass_name), side_effects_(side_effects) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kGlobalValueNumberingPassName = "GVN";
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index d270c6a28e..3a10d5831d 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -243,7 +243,7 @@ HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name)
graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) {
}
-void HInductionVarAnalysis::Run() {
+bool HInductionVarAnalysis::Run() {
// Detects sequence variables (generalized induction variables) during an outer to inner
// traversal of all loops using Gerlek's algorithm. The order is important to enable
// range analysis on outer loop while visiting inner loops.
@@ -253,6 +253,7 @@ void HInductionVarAnalysis::Run() {
VisitLoop(graph_block->GetLoopInformation());
}
}
+ return !induction_.empty();
}
void HInductionVarAnalysis::VisitLoop(HLoopInformation* loop) {
@@ -1073,8 +1074,8 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr,
&& lower_value >= upper_value;
default:
LOG(FATAL) << "CONDITION UNREACHABLE";
+ UNREACHABLE();
}
- return false; // not certain, may be untaken
}
bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr,
@@ -1098,8 +1099,8 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr,
return (IsAtLeast(upper_expr, &value) && value >= (min - stride_value));
default:
LOG(FATAL) << "CONDITION UNREACHABLE";
+ UNREACHABLE();
}
- return false; // not certain, may be infinite
}
bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr,
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index acad77d35f..a48aa90059 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -37,7 +37,7 @@ class HInductionVarAnalysis : public HOptimization {
public:
explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName);
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kInductionPassName = "induction_var_analysis";
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 99dec11240..4c78fa8f06 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -78,22 +78,15 @@ static bool IsGEZero(HInstruction* instruction) {
DCHECK(instruction != nullptr);
if (instruction->IsArrayLength()) {
return true;
- } else if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // Instruction MIN(>=0, >=0) is >= 0.
- return IsGEZero(instruction->InputAt(0)) &&
- IsGEZero(instruction->InputAt(1));
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- // Instruction ABS(>=0) is >= 0.
- // NOTE: ABS(minint) = minint prevents assuming
- // >= 0 without looking at the argument.
- return IsGEZero(instruction->InputAt(0));
- default:
- break;
- }
+ } else if (instruction->IsMin()) {
+ // Instruction MIN(>=0, >=0) is >= 0.
+ return IsGEZero(instruction->InputAt(0)) &&
+ IsGEZero(instruction->InputAt(1));
+ } else if (instruction->IsAbs()) {
+ // Instruction ABS(>=0) is >= 0.
+ // NOTE: ABS(minint) = minint prevents assuming
+ // >= 0 without looking at the argument.
+ return IsGEZero(instruction->InputAt(0));
}
int64_t value = -1;
return IsInt64AndGet(instruction, &value) && value >= 0;
@@ -102,21 +95,14 @@ static bool IsGEZero(HInstruction* instruction) {
/** Hunts "under the hood" for a suitable instruction at the hint. */
static bool IsMaxAtHint(
HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
- if (instruction->IsInvokeStaticOrDirect()) {
- switch (instruction->AsInvoke()->GetIntrinsic()) {
- case Intrinsics::kMathMinIntInt:
- case Intrinsics::kMathMinLongLong:
- // For MIN(x, y), return most suitable x or y as maximum.
- return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
- IsMaxAtHint(instruction->InputAt(1), hint, suitable);
- default:
- break;
- }
+ if (instruction->IsMin()) {
+ // For MIN(x, y), return most suitable x or y as maximum.
+ return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+ IsMaxAtHint(instruction->InputAt(1), hint, suitable);
} else {
*suitable = instruction;
return HuntForDeclaration(instruction) == hint;
}
- return false;
}
/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
@@ -230,13 +216,13 @@ bool InductionVarRange::GetInductionRange(HInstruction* context,
chase_hint_ = chase_hint;
bool in_body = context->GetBlock() != loop->GetHeader();
int64_t stride_value = 0;
- *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
- *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint);
+ *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true));
+ *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min= */ false), chase_hint);
*needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip);
chase_hint_ = nullptr;
// Retry chasing constants for wrap-around (merge sensitive).
if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) {
- *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+ *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min= */ true));
}
return true;
}
@@ -365,14 +351,16 @@ void InductionVarRange::Replace(HInstruction* instruction,
}
}
-bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const {
- HInductionVarAnalysis::InductionInfo *trip =
- induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
- if (trip != nullptr && !IsUnsafeTripCount(trip)) {
- IsConstant(trip->op_a, kExact, tc);
- return true;
- }
- return false;
+bool InductionVarRange::IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const {
+ bool is_constant_unused = false;
+ return CheckForFiniteAndConstantProps(loop, &is_constant_unused, trip_count);
+}
+
+bool InductionVarRange::HasKnownTripCount(HLoopInformation* loop,
+ /*out*/ int64_t* trip_count) const {
+ bool is_constant = false;
+ CheckForFiniteAndConstantProps(loop, &is_constant, trip_count);
+ return is_constant;
}
bool InductionVarRange::IsUnitStride(HInstruction* context,
@@ -431,6 +419,18 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
// Private class methods.
//
+bool InductionVarRange::CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const {
+ HInductionVarAnalysis::InductionInfo *trip =
+ induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+ if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+ *is_constant = IsConstant(trip->op_a, kExact, trip_count);
+ return true;
+ }
+ return false;
+}
+
bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
ConstantRequest request,
/*out*/ int64_t* value) const {
@@ -445,8 +445,8 @@ bool InductionVarRange::IsConstant(HInductionVarAnalysis::InductionInfo* info,
}
// Try range analysis on the invariant, only accept a proper range
// to avoid arithmetic wrap-around anomalies.
- Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
- Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
+ Value min_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ true);
+ Value max_val = GetVal(info, nullptr, /* in_body= */ true, /* is_min= */ false);
if (IsConstantValue(min_val) &&
IsConstantValue(max_val) && min_val.b_constant <= max_val.b_constant) {
if ((request == kExact && min_val.b_constant == max_val.b_constant) || request == kAtMost) {
@@ -791,10 +791,10 @@ InductionVarRange::Value InductionVarRange::GetMul(HInductionVarAnalysis::Induct
return MulRangeAndConstant(value, info1, trip, in_body, is_min);
}
// Interval ranges.
- Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
- Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
- Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
- Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+ Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true);
+ Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false);
+ Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true);
+ Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false);
// Positive range vs. positive or negative range.
if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -825,10 +825,10 @@ InductionVarRange::Value InductionVarRange::GetDiv(HInductionVarAnalysis::Induct
return DivRangeAndConstant(value, info1, trip, in_body, is_min);
}
// Interval ranges.
- Value v1_min = GetVal(info1, trip, in_body, /* is_min */ true);
- Value v1_max = GetVal(info1, trip, in_body, /* is_min */ false);
- Value v2_min = GetVal(info2, trip, in_body, /* is_min */ true);
- Value v2_max = GetVal(info2, trip, in_body, /* is_min */ false);
+ Value v1_min = GetVal(info1, trip, in_body, /* is_min= */ true);
+ Value v1_max = GetVal(info1, trip, in_body, /* is_min= */ false);
+ Value v2_min = GetVal(info2, trip, in_body, /* is_min= */ true);
+ Value v2_max = GetVal(info2, trip, in_body, /* is_min= */ false);
// Positive range vs. positive or negative range.
if (IsConstantValue(v1_min) && v1_min.b_constant >= 0) {
if (IsConstantValue(v2_min) && v2_min.b_constant >= 0) {
@@ -1019,10 +1019,10 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context,
// Code generation for taken test: generate the code when requested or otherwise analyze
// if code generation is feasible when taken test is needed.
if (taken_test != nullptr) {
- return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min */ false);
+ return GenerateCode(trip->op_b, nullptr, graph, block, taken_test, in_body, /* is_min= */ false);
} else if (*needs_taken_test) {
if (!GenerateCode(
- trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min */ false)) {
+ trip->op_b, nullptr, nullptr, nullptr, nullptr, in_body, /* is_min= */ false)) {
return false;
}
}
@@ -1030,9 +1030,9 @@ bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context,
return
// Success on lower if invariant (not set), or code can be generated.
((info->induction_class == HInductionVarAnalysis::kInvariant) ||
- GenerateCode(info, trip, graph, block, lower, in_body, /* is_min */ true)) &&
+ GenerateCode(info, trip, graph, block, lower, in_body, /* is_min= */ true)) &&
// And success on upper.
- GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
+ GenerateCode(info, trip, graph, block, upper, in_body, /* is_min= */ false);
}
bool InductionVarRange::GenerateLastValuePolynomial(HInductionVarAnalysis::InductionInfo* info,
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 0b980f596a..906dc6bb7b 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -161,9 +161,15 @@ class InductionVarRange {
}
/**
- * Checks if header logic of a loop terminates. Sets trip-count tc if known.
+ * Checks if header logic of a loop terminates. If trip count is known sets 'trip_count' to its
+ * value.
*/
- bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
+ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
+
+ /**
+ * Checks if a trip count is known for the loop and sets 'trip_count' to its value in this case.
+ */
+ bool HasKnownTripCount(HLoopInformation* loop, /*out*/ int64_t* trip_count) const;
/**
* Checks if the given instruction is a unit stride induction inside the closest enveloping
@@ -194,6 +200,14 @@ class InductionVarRange {
};
/**
+ * Checks if header logic of a loop terminates. If trip count is known (constant) sets
+ * 'is_constant' to true and 'trip_count' to the trip count value.
+ */
+ bool CheckForFiniteAndConstantProps(HLoopInformation* loop,
+ /*out*/ bool* is_constant,
+ /*out*/ int64_t* trip_count) const;
+
+ /**
* Returns true if exact or upper/lower bound on the given induction
* information is known as a 64-bit constant, which is returned in value.
*/
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index e5bc6ef22c..f6af384af0 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -252,24 +252,24 @@ class InductionVarRangeTest : public OptimizingUnitTest {
Value GetMin(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* trip) {
- return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ true);
+ return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ true);
}
Value GetMax(HInductionVarAnalysis::InductionInfo* info,
HInductionVarAnalysis::InductionInfo* trip) {
- return range_.GetVal(info, trip, /* in_body */ true, /* is_min */ false);
+ return range_.GetVal(info, trip, /* in_body= */ true, /* is_min= */ false);
}
Value GetMul(HInductionVarAnalysis::InductionInfo* info1,
HInductionVarAnalysis::InductionInfo* info2,
bool is_min) {
- return range_.GetMul(info1, info2, nullptr, /* in_body */ true, is_min);
+ return range_.GetMul(info1, info2, nullptr, /* in_body= */ true, is_min);
}
Value GetDiv(HInductionVarAnalysis::InductionInfo* info1,
HInductionVarAnalysis::InductionInfo* info2,
bool is_min) {
- return range_.GetDiv(info1, info2, nullptr, /* in_body */ true, is_min);
+ return range_.GetDiv(info1, info2, nullptr, /* in_body= */ true, is_min);
}
Value GetRem(HInductionVarAnalysis::InductionInfo* info1,
@@ -701,7 +701,11 @@ TEST_F(InductionVarRangeTest, MaxValue) {
TEST_F(InductionVarRangeTest, ArrayLengthAndHints) {
// We pass a bogus constant for the class to avoid mocking one.
- HInstruction* new_array = new (GetAllocator()) HNewArray(x_, x_, 0);
+ HInstruction* new_array = new (GetAllocator()) HNewArray(
+ /* cls= */ x_,
+ /* length= */ x_,
+ /* dex_pc= */ 0,
+ /* component_size_shift= */ 0);
entry_block_->AddInstruction(new_array);
HInstruction* array_length = new (GetAllocator()) HArrayLength(new_array, 0);
entry_block_->AddInstruction(array_length);
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 4fc7262265..205077fb49 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -18,15 +18,16 @@
#include "art_method-inl.h"
#include "base/enums.h"
+#include "base/logging.h"
#include "builder.h"
#include "class_linker.h"
+#include "class_root.h"
#include "constant_folding.h"
#include "data_type-inl.h"
#include "dead_code_elimination.h"
#include "dex/inline_method_analyser.h"
#include "dex/verification_results.h"
#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "instruction_simplifier.h"
@@ -35,8 +36,9 @@
#include "jit/jit_code_cache.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
+#include "mirror/object_array-alloc-inl.h"
+#include "mirror/object_array-inl.h"
#include "nodes.h"
-#include "optimizing_compiler.h"
#include "reference_type_propagation.h"
#include "register_allocator_linear_scan.h"
#include "scoped_thread_state_change-inl.h"
@@ -124,13 +126,18 @@ void HInliner::UpdateInliningBudget() {
}
}
-void HInliner::Run() {
- if (graph_->IsDebuggable()) {
+bool HInliner::Run() {
+ if (codegen_->GetCompilerOptions().GetInlineMaxCodeUnits() == 0) {
+ // Inlining effectively disabled.
+ return false;
+ } else if (graph_->IsDebuggable()) {
// For simplicity, we currently never inline when the graph is debuggable. This avoids
// doing some logic in the runtime to discover if a method could have been inlined.
- return;
+ return false;
}
+ bool didInline = false;
+
// Initialize the number of instructions for the method being compiled. Recursive calls
// to HInliner::Run have already updated the instruction count.
if (outermost_graph_ == graph_) {
@@ -143,14 +150,15 @@ void HInliner::Run() {
// If we're compiling with a core image (which is only used for
// test purposes), honor inlining directives in method names:
- // - if a method's name contains the substring "$inline$", ensure
- // that this method is actually inlined;
// - if a method's name contains the substring "$noinline$", do not
- // inline that method.
- // We limit this to AOT compilation, as the JIT may or may not inline
+ // inline that method;
+ // - if a method's name contains the substring "$inline$", ensure
+ // that this method is actually inlined.
+ // We limit the latter to AOT compilation, as the JIT may or may not inline
// depending on the state of classes at runtime.
- const bool honor_inlining_directives =
- IsCompilingWithCoreImage() && Runtime::Current()->IsAotCompiler();
+ const bool honor_noinline_directives = codegen_->GetCompilerOptions().CompilingWithCoreImage();
+ const bool honor_inline_directives =
+ honor_noinline_directives && Runtime::Current()->IsAotCompiler();
// Keep a copy of all blocks when starting the visit.
ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
@@ -164,25 +172,32 @@ void HInliner::Run() {
HInvoke* call = instruction->AsInvoke();
// As long as the call is not intrinsified, it is worth trying to inline.
if (call != nullptr && call->GetIntrinsic() == Intrinsics::kNone) {
- if (honor_inlining_directives) {
+ if (honor_noinline_directives) {
// Debugging case: directives in method names control or assert on inlining.
std::string callee_name = outer_compilation_unit_.GetDexFile()->PrettyMethod(
- call->GetDexMethodIndex(), /* with_signature */ false);
+ call->GetDexMethodIndex(), /* with_signature= */ false);
// Tests prevent inlining by having $noinline$ in their method names.
if (callee_name.find("$noinline$") == std::string::npos) {
- if (!TryInline(call)) {
+ if (TryInline(call)) {
+ didInline = true;
+ } else if (honor_inline_directives) {
bool should_have_inlined = (callee_name.find("$inline$") != std::string::npos);
CHECK(!should_have_inlined) << "Could not inline " << callee_name;
}
}
} else {
+ DCHECK(!honor_inline_directives);
// Normal case: try to inline.
- TryInline(call);
+ if (TryInline(call)) {
+ didInline = true;
+ }
}
}
instruction = next;
}
}
+
+ return didInline;
}
static bool IsMethodOrDeclaringClassFinal(ArtMethod* method)
@@ -274,7 +289,7 @@ static uint32_t FindMethodIndexIn(ArtMethod* method,
}
}
-static dex::TypeIndex FindClassIndexIn(mirror::Class* cls,
+static dex::TypeIndex FindClassIndexIn(ObjPtr<mirror::Class> cls,
const DexCompilationUnit& compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_) {
const DexFile& dex_file = *compilation_unit.GetDexFile();
@@ -353,7 +368,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheType(
}
}
-static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
+static ObjPtr<mirror::Class> GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(classes->Get(0) != nullptr);
return classes->Get(0);
@@ -367,6 +382,11 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
// No CHA-based devirtulization for AOT compiler (yet).
return nullptr;
}
+ if (Runtime::Current()->IsZygote()) {
+ // No CHA-based devirtulization for Zygote, as it compiles with
+ // offline information.
+ return nullptr;
+ }
if (outermost_graph_->IsCompilingOsr()) {
// We do not support HDeoptimize in OSR methods.
return nullptr;
@@ -392,7 +412,7 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
return single_impl;
}
-static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method)
+static bool IsMethodUnverified(const CompilerOptions& compiler_options, ArtMethod* method)
REQUIRES_SHARED(Locks::mutator_lock_) {
if (!method->GetDeclaringClass()->IsVerified()) {
if (Runtime::Current()->UseJitCompilation()) {
@@ -401,8 +421,9 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod*
return true;
}
uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex();
- if (!compiler_driver->IsMethodVerifiedWithoutFailures(
- method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
+ if (!compiler_options.IsMethodVerifiedWithoutFailures(method->GetDexMethodIndex(),
+ class_def_idx,
+ *method->GetDexFile())) {
// Method has soft or hard failures, don't analyze.
return true;
}
@@ -410,11 +431,11 @@ static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod*
return false;
}
-static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method)
+static bool AlwaysThrows(const CompilerOptions& compiler_options, ArtMethod* method)
REQUIRES_SHARED(Locks::mutator_lock_) {
DCHECK(method != nullptr);
// Skip non-compilable and unverified methods.
- if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) {
+ if (!method->IsCompilable() || IsMethodUnverified(compiler_options, method)) {
return false;
}
// Skip native methods, methods with try blocks, and methods that are too large.
@@ -446,9 +467,10 @@ static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* metho
bool HInliner::TryInline(HInvoke* invoke_instruction) {
if (invoke_instruction->IsInvokeUnresolved() ||
- invoke_instruction->IsInvokePolymorphic()) {
- return false; // Don't bother to move further if we know the method is unresolved or an
- // invoke-polymorphic.
+ invoke_instruction->IsInvokePolymorphic() ||
+ invoke_instruction->IsInvokeCustom()) {
+ return false; // Don't bother to move further if we know the method is unresolved or the
+ // invocation is polymorphic (invoke-{polymorphic,custom}).
}
ScopedObjectAccess soa(Thread::Current());
@@ -487,7 +509,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
bool result = TryInlineAndReplace(invoke_instruction,
actual_method,
ReferenceTypeInfo::CreateInvalid(),
- /* do_rtp */ true,
+ /* do_rtp= */ true,
cha_devirtualize);
if (result) {
// Successfully inlined.
@@ -501,7 +523,7 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) {
MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface);
}
}
- } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) {
+ } else if (!cha_devirtualize && AlwaysThrows(codegen_->GetCompilerOptions(), actual_method)) {
// Set always throws property for non-inlined method call with single target
// (unless it was obtained through CHA, because that would imply we have
// to add the CHA dependency, which seems not worth it).
@@ -524,7 +546,7 @@ static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
mirror::ObjectArray<mirror::Class>::Alloc(
self,
- class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+ GetClassRoot<mirror::ObjectArray<mirror::Class>>(class_linker),
InlineCache::kIndividualCacheSize));
if (inline_cache == nullptr) {
// We got an OOME. Just clear the exception, and don't inline.
@@ -572,9 +594,12 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
StackHandleScope<1> hs(Thread::Current());
Handle<mirror::ObjectArray<mirror::Class>> inline_cache;
- InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler()
- ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
- : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
+ // The Zygote JIT compiles based on a profile, so we shouldn't use runtime inline caches
+ // for it.
+ InlineCacheType inline_cache_type =
+ (Runtime::Current()->IsAotCompiler() || Runtime::Current()->IsZygote())
+ ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
+ : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
switch (inline_cache_type) {
case kInlineCacheNoData: {
@@ -662,8 +687,7 @@ HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
StackHandleScope<1>* hs,
/*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
REQUIRES_SHARED(Locks::mutator_lock_) {
- DCHECK(Runtime::Current()->IsAotCompiler());
- const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo();
+ const ProfileCompilationInfo* pci = codegen_->GetCompilerOptions().GetProfileCompilationInfo();
if (pci == nullptr) {
return kInlineCacheNoData;
}
@@ -716,7 +740,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
offline_profile.dex_references.size());
for (size_t i = 0; i < offline_profile.dex_references.size(); i++) {
bool found = false;
- for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) {
+ for (const DexFile* dex_file : codegen_->GetCompilerOptions().GetDexFilesForOatFile()) {
if (offline_profile.dex_references[i].MatchesDex(dex_file)) {
dex_profile_index_to_dex_cache[i] =
caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file);
@@ -764,7 +788,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
HInstruction* receiver,
uint32_t dex_pc) const {
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>(class_linker)->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet(
receiver,
@@ -841,9 +865,9 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
if (!TryInlineAndReplace(invoke_instruction,
resolved_method,
- ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
- /* do_rtp */ false,
- /* cha_devirtualize */ false)) {
+ ReferenceTypeInfo::Create(monomorphic_type, /* is_exact= */ true),
+ /* do_rtp= */ false,
+ /* cha_devirtualize= */ false)) {
return false;
}
@@ -854,7 +878,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
class_index,
monomorphic_type,
invoke_instruction,
- /* with_deoptimization */ true);
+ /* with_deoptimization= */ true);
// Run type propagation to get the guard typed, and eventually propagate the
// type of the receiver.
@@ -862,7 +886,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
- /* is_first_run */ false);
+ /* is_first_run= */ false);
rtp_fixup.Run();
MaybeRecordStat(stats_, MethodCompilationStat::kInlinedMonomorphicCall);
@@ -932,9 +956,9 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver,
klass,
is_referrer,
invoke_instruction->GetDexPc(),
- /* needs_access_check */ false);
+ /* needs_access_check= */ false);
HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
- load_class, codegen_, compiler_driver_, caller_compilation_unit_);
+ load_class, codegen_, caller_compilation_unit_);
DCHECK(kind != HLoadClass::LoadKind::kInvalid)
<< "We should always be able to reference a class for inline caches";
// Load kind must be set before inserting the instruction into the graph.
@@ -1010,7 +1034,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
if (!class_index.IsValid() ||
!TryBuildAndInline(invoke_instruction,
method,
- ReferenceTypeInfo::Create(handle, /* is_exact */ true),
+ ReferenceTypeInfo::Create(handle, /* is_exact= */ true),
&return_replacement)) {
all_targets_inlined = false;
} else {
@@ -1062,7 +1086,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction,
outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
- /* is_first_run */ false);
+ /* is_first_run= */ false);
rtp_fixup.Run();
return true;
}
@@ -1133,14 +1157,14 @@ void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare,
graph_->UpdateLoopAndTryInformationOfNewBlock(
- then, original_invoke_block, /* replace_if_back_edge */ false);
+ then, original_invoke_block, /* replace_if_back_edge= */ false);
graph_->UpdateLoopAndTryInformationOfNewBlock(
- otherwise, original_invoke_block, /* replace_if_back_edge */ false);
+ otherwise, original_invoke_block, /* replace_if_back_edge= */ false);
// In case the original invoke location was a back edge, we need to update
// the loop to now have the merge block as a back edge.
graph_->UpdateLoopAndTryInformationOfNewBlock(
- merge, original_invoke_block, /* replace_if_back_edge */ true);
+ merge, original_invoke_block, /* replace_if_back_edge= */ true);
}
bool HInliner::TryInlinePolymorphicCallToSameTarget(
@@ -1258,7 +1282,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget(
outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
- /* is_first_run */ false);
+ /* is_first_run= */ false);
rtp_fixup.Run();
MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall);
@@ -1281,9 +1305,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
// If invoke_instruction is devirtualized to a different method, give intrinsics
// another chance before we try to inline it.
- bool wrong_invoke_type = false;
- if (invoke_instruction->GetResolvedMethod() != method &&
- IntrinsicsRecognizer::Recognize(invoke_instruction, method, &wrong_invoke_type)) {
+ if (invoke_instruction->GetResolvedMethod() != method && method->IsIntrinsic()) {
MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized);
if (invoke_instruction->IsInvokeInterface()) {
// We don't intrinsify an invoke-interface directly.
@@ -1296,6 +1318,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
invoke_instruction->GetDexMethodIndex(), // Use interface method's dex method index.
method,
method->GetMethodIndex());
+ DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone);
HInputsRef inputs = invoke_instruction->GetInputs();
for (size_t index = 0; index != inputs.size(); ++index) {
new_invoke->SetArgumentAt(index, inputs[index]);
@@ -1305,14 +1328,11 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
if (invoke_instruction->GetType() == DataType::Type::kReference) {
new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo());
}
- // Run intrinsic recognizer again to set new_invoke's intrinsic.
- IntrinsicsRecognizer::Recognize(new_invoke, method, &wrong_invoke_type);
- DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone);
return_replacement = new_invoke;
// invoke_instruction is replaced with new_invoke.
should_remove_invoke_instruction = true;
} else {
- // invoke_instruction is intrinsified and stays.
+ invoke_instruction->SetResolvedMethod(method);
}
} else if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
if (invoke_instruction->IsInvokeInterface()) {
@@ -1386,7 +1406,7 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction,
outer_compilation_unit_.GetClassLoader(),
outer_compilation_unit_.GetDexCache(),
handles_,
- /* is_first_run */ false).Run();
+ /* is_first_run= */ false).Run();
}
return true;
}
@@ -1403,6 +1423,18 @@ size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
return count;
}
+static inline bool MayInline(const CompilerOptions& compiler_options,
+ const DexFile& inlined_from,
+ const DexFile& inlined_into) {
+ // We're not allowed to inline across dex files if we're the no-inline-from dex file.
+ if (!IsSameDexFile(inlined_from, inlined_into) &&
+ ContainsElement(compiler_options.GetNoInlineFromDexFile(), &inlined_from)) {
+ return false;
+ }
+
+ return true;
+}
+
bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
ArtMethod* method,
ReferenceTypeInfo receiver_type,
@@ -1424,8 +1456,9 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
// Check whether we're allowed to inline. The outermost compilation unit is the relevant
// dex file here (though the transitivity of an inline chain would allow checking the calller).
- if (!compiler_driver_->MayInline(method->GetDexFile(),
- outer_compilation_unit_.GetDexFile())) {
+ if (!MayInline(codegen_->GetCompilerOptions(),
+ *method->GetDexFile(),
+ *outer_compilation_unit_.GetDexFile())) {
if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
LOG_SUCCESS() << "Successfully replaced pattern of invoke "
<< method->PrettyMethod();
@@ -1450,7 +1483,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
+ size_t inline_max_code_units = codegen_->GetCompilerOptions().GetInlineMaxCodeUnits();
if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem)
<< "Method " << method->PrettyMethod()
@@ -1474,7 +1507,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
return false;
}
- if (IsMethodUnverified(compiler_driver_, method)) {
+ if (IsMethodUnverified(codegen_->GetCompilerOptions(), method)) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified)
<< "Method " << method->PrettyMethod()
<< " couldn't be verified, so it cannot be inlined";
@@ -1599,7 +1632,8 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
[](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
// Create HInstanceFieldSet for each IPUT that stores non-zero data.
- HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
+ HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction,
+ /* arg_vreg_index= */ 0u);
bool needs_constructor_barrier = false;
for (size_t i = 0; i != number_of_iputs; ++i) {
HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
@@ -1617,7 +1651,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction,
}
}
if (needs_constructor_barrier) {
- // See CompilerDriver::RequiresConstructorBarrier for more details.
+ // See DexCompilationUnit::RequiresConstructorBarrier for more details.
DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence";
HConstructorFence* constructor_fence =
@@ -1641,7 +1675,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
REQUIRES_SHARED(Locks::mutator_lock_) {
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
ArtField* resolved_field =
- class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
+ class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false);
DCHECK(resolved_field != nullptr);
HInstanceFieldGet* iget = new (graph_->GetAllocator()) HInstanceFieldGet(
obj,
@@ -1654,7 +1688,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
*referrer->GetDexFile(),
// Read barrier generates a runtime call in slow path and we need a valid
// dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
- /* dex_pc */ 0);
+ /* dex_pc= */ 0);
if (iget->GetType() == DataType::Type::kReference) {
// Use the same dex_cache that we used for field lookup as the hint_dex_cache.
Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache());
@@ -1662,7 +1696,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
outer_compilation_unit_.GetClassLoader(),
dex_cache,
handles_,
- /* is_first_run */ false);
+ /* is_first_run= */ false);
rtp.Visit(iget);
}
return iget;
@@ -1676,7 +1710,7 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
REQUIRES_SHARED(Locks::mutator_lock_) {
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
ArtField* resolved_field =
- class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
+ class_linker->LookupResolvedField(field_index, referrer, /* is_static= */ false);
DCHECK(resolved_field != nullptr);
if (is_final != nullptr) {
// This information is needed only for constructors.
@@ -1695,18 +1729,33 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
*referrer->GetDexFile(),
// Read barrier generates a runtime call in slow path and we need a valid
// dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
- /* dex_pc */ 0);
+ /* dex_pc= */ 0);
return iput;
}
template <typename T>
-static inline Handle<T> NewHandleIfDifferent(T* object,
+static inline Handle<T> NewHandleIfDifferent(ObjPtr<T> object,
Handle<T> hint,
VariableSizedHandleScope* handles)
REQUIRES_SHARED(Locks::mutator_lock_) {
return (object != hint.Get()) ? handles->NewHandle(object) : hint;
}
+static bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (!Runtime::Current()->IsAotCompiler()) {
+ // JIT can always encode methods in stack maps.
+ return true;
+ }
+ if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) {
+ return true;
+ }
+ // TODO(ngeoffray): Support more AOT cases for inlining:
+ // - methods in multidex
+ // - methods in boot image for on-device non-PIC compilation.
+ return false;
+}
+
bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
ArtMethod* resolved_method,
ReferenceTypeInfo receiver_type,
@@ -1714,7 +1763,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
HInstruction** return_replacement) {
DCHECK(!(resolved_method->IsStatic() && receiver_type.IsValid()));
ScopedObjectAccess soa(Thread::Current());
- const DexFile::CodeItem* code_item = resolved_method->GetCodeItem();
+ const dex::CodeItem* code_item = resolved_method->GetCodeItem();
const DexFile& callee_dex_file = *resolved_method->GetDexFile();
uint32_t method_index = resolved_method->GetDexMethodIndex();
CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo());
@@ -1727,6 +1776,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
caller_compilation_unit_.GetClassLoader(),
handles_);
+ Handle<mirror::Class> compiling_class = handles_->NewHandle(resolved_method->GetDeclaringClass());
DexCompilationUnit dex_compilation_unit(
class_loader,
class_linker,
@@ -1735,8 +1785,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
resolved_method->GetDeclaringClass()->GetDexClassDefIndex(),
method_index,
resolved_method->GetAccessFlags(),
- /* verified_method */ nullptr,
- dex_cache);
+ /* verified_method= */ nullptr,
+ dex_cache,
+ compiling_class);
InvokeType invoke_type = invoke_instruction->GetInvokeType();
if (invoke_type == kInterface) {
@@ -1745,16 +1796,25 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
invoke_type = kVirtual;
}
+ bool caller_dead_reference_safe = graph_->IsDeadReferenceSafe();
+ const dex::ClassDef& callee_class = resolved_method->GetClassDef();
+ // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation()
+ // is currently rarely true.
+ bool callee_dead_reference_safe =
+ annotations::HasDeadReferenceSafeAnnotation(callee_dex_file, callee_class)
+ && !annotations::MethodContainsRSensitiveAccess(callee_dex_file, callee_class, method_index);
+
const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId();
HGraph* callee_graph = new (graph_->GetAllocator()) HGraph(
graph_->GetAllocator(),
graph_->GetArenaStack(),
callee_dex_file,
method_index,
- compiler_driver_->GetInstructionSet(),
+ codegen_->GetCompilerOptions().GetInstructionSet(),
invoke_type,
+ callee_dead_reference_safe,
graph_->IsDebuggable(),
- /* osr */ false,
+ /* osr= */ false,
caller_instruction_counter);
callee_graph->SetArtMethod(resolved_method);
@@ -1775,7 +1835,6 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
code_item_accessor,
&dex_compilation_unit,
&outer_compilation_unit_,
- compiler_driver_,
codegen_,
inline_stats_,
resolved_method->GetQuickenedInfo(),
@@ -1788,8 +1847,8 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
return false;
}
- if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
- compiler_driver_->GetInstructionSet())) {
+ if (!RegisterAllocator::CanAllocateRegistersFor(
+ *callee_graph, codegen_->GetCompilerOptions().GetInstructionSet())) {
LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator)
<< "Method " << callee_dex_file.PrettyMethod(method_index)
<< " cannot be inlined because of the register allocator";
@@ -1836,7 +1895,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
outer_compilation_unit_.GetClassLoader(),
dex_compilation_unit.GetDexCache(),
handles_,
- /* is_first_run */ false).Run();
+ /* is_first_run= */ false).Run();
}
RunOptimizations(callee_graph, code_item, dex_compilation_unit);
@@ -1980,23 +2039,26 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
inline_stats_->AddTo(stats_);
}
+ if (caller_dead_reference_safe && !callee_dead_reference_safe) {
+ // Caller was dead reference safe, but is not anymore, since we inlined dead
+ // reference unsafe code. Prior transformations remain valid, since they did not
+ // affect the inlined code.
+ graph_->MarkDeadReferenceUnsafe();
+ }
+
return true;
}
void HInliner::RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
+ const dex::CodeItem* code_item,
const DexCompilationUnit& dex_compilation_unit) {
// Note: if the outermost_graph_ is being compiled OSR, we should not run any
// optimization that could lead to a HDeoptimize. The following optimizations do not.
HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
HConstantFolding fold(callee_graph, "constant_folding$inliner");
- HSharpening sharpening(callee_graph, codegen_, compiler_driver_);
- InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
- IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
+ InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
HOptimization* optimizations[] = {
- &intrinsics,
- &sharpening,
&simplify,
&fold,
&dce,
@@ -2031,7 +2093,6 @@ void HInliner::RunOptimizations(HGraph* callee_graph,
codegen_,
outer_compilation_unit_,
dex_compilation_unit,
- compiler_driver_,
handles_,
inline_stats_,
total_number_of_dex_registers_ + accessor.RegistersSize(),
@@ -2065,7 +2126,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod*
// is more specific than the class which declares the method.
if (!resolved_method->IsStatic()) {
if (IsReferenceTypeRefinement(GetClassRTI(resolved_method->GetDeclaringClass()),
- /* declared_can_be_null */ false,
+ /* declared_can_be_null= */ false,
invoke_instruction->InputAt(0u))) {
return true;
}
@@ -2074,7 +2135,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod*
// Iterate over the list of parameter types and test whether any of the
// actual inputs has a more specific reference type than the type declared in
// the signature.
- const DexFile::TypeList* param_list = resolved_method->GetParameterTypeList();
+ const dex::TypeList* param_list = resolved_method->GetParameterTypeList();
for (size_t param_idx = 0,
input_idx = resolved_method->IsStatic() ? 0 : 1,
e = (param_list == nullptr ? 0 : param_list->Size());
@@ -2085,7 +2146,7 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod*
ObjPtr<mirror::Class> param_cls = resolved_method->LookupResolvedClassFromTypeIndex(
param_list->GetTypeItem(param_idx).type_idx_);
if (IsReferenceTypeRefinement(GetClassRTI(param_cls),
- /* declared_can_be_null */ true,
+ /* declared_can_be_null= */ true,
input)) {
return true;
}
@@ -2102,14 +2163,13 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction,
if (return_replacement->GetType() == DataType::Type::kReference) {
// Test if the return type is a refinement of the declared return type.
if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(),
- /* declared_can_be_null */ true,
+ /* declared_can_be_null= */ true,
return_replacement)) {
return true;
} else if (return_replacement->IsInstanceFieldGet()) {
HInstanceFieldGet* field_get = return_replacement->AsInstanceFieldGet();
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
if (field_get->GetFieldInfo().GetField() ==
- class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0)) {
+ GetClassRoot<mirror::Object>()->GetInstanceField(0)) {
return true;
}
}
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 02465d37ba..15d7349694 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -19,13 +19,12 @@
#include "dex/dex_file_types.h"
#include "dex/invoke_type.h"
-#include "jit/profile_compilation_info.h"
#include "optimization.h"
+#include "profile/profile_compilation_info.h"
namespace art {
class CodeGenerator;
-class CompilerDriver;
class DexCompilationUnit;
class HGraph;
class HInvoke;
@@ -38,7 +37,6 @@ class HInliner : public HOptimization {
CodeGenerator* codegen,
const DexCompilationUnit& outer_compilation_unit,
const DexCompilationUnit& caller_compilation_unit,
- CompilerDriver* compiler_driver,
VariableSizedHandleScope* handles,
OptimizingCompilerStats* stats,
size_t total_number_of_dex_registers,
@@ -51,7 +49,6 @@ class HInliner : public HOptimization {
outer_compilation_unit_(outer_compilation_unit),
caller_compilation_unit_(caller_compilation_unit),
codegen_(codegen),
- compiler_driver_(compiler_driver),
total_number_of_dex_registers_(total_number_of_dex_registers),
total_number_of_instructions_(total_number_of_instructions),
parent_(parent),
@@ -60,7 +57,7 @@ class HInliner : public HOptimization {
handles_(handles),
inline_stats_(nullptr) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kInlinerPassName = "inliner";
@@ -101,7 +98,7 @@ class HInliner : public HOptimization {
// Run simple optimizations on `callee_graph`.
void RunOptimizations(HGraph* callee_graph,
- const DexFile::CodeItem* code_item,
+ const dex::CodeItem* code_item,
const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
@@ -280,7 +277,6 @@ class HInliner : public HOptimization {
const DexCompilationUnit& outer_compilation_unit_;
const DexCompilationUnit& caller_compilation_unit_;
CodeGenerator* const codegen_;
- CompilerDriver* const compiler_driver_;
const size_t total_number_of_dex_registers_;
size_t total_number_of_instructions_;
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c7aef3779d..5e7b57523f 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -19,12 +19,13 @@
#include "art_method-inl.h"
#include "base/arena_bit_vector.h"
#include "base/bit_vector-inl.h"
+#include "base/logging.h"
#include "block_builder.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
+#include "code_generator.h"
#include "data_type-inl.h"
#include "dex/bytecode_utils.h"
#include "dex/dex_instruction-inl.h"
-#include "driver/compiler_driver-inl.h"
#include "driver/dex_compilation_unit.h"
#include "driver/compiler_options.h"
#include "imtable-inl.h"
@@ -47,7 +48,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph,
DataType::Type return_type,
const DexCompilationUnit* dex_compilation_unit,
const DexCompilationUnit* outer_compilation_unit,
- CompilerDriver* compiler_driver,
CodeGenerator* code_generator,
ArrayRef<const uint8_t> interpreter_metadata,
OptimizingCompilerStats* compiler_stats,
@@ -61,7 +61,6 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph,
return_type_(return_type),
block_builder_(block_builder),
ssa_builder_(ssa_builder),
- compiler_driver_(compiler_driver),
code_generator_(code_generator),
dex_compilation_unit_(dex_compilation_unit),
outer_compilation_unit_(outer_compilation_unit),
@@ -73,7 +72,8 @@ HInstructionBuilder::HInstructionBuilder(HGraph* graph,
current_locals_(nullptr),
latest_result_(nullptr),
current_this_parameter_(nullptr),
- loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)) {
+ loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
+ class_cache_(std::less<dex::TypeIndex>(), local_allocator->Adapter(kArenaAllocGraphBuilder)) {
loop_headers_.reserve(kDefaultNumberOfLoops);
}
@@ -319,8 +319,8 @@ bool HInstructionBuilder::Build() {
// Find locations where we want to generate extra stackmaps for native debugging.
// This allows us to generate the info only at interesting points (for example,
// at start of java statement) rather than before every dex instruction.
- const bool native_debuggable = compiler_driver_ != nullptr &&
- compiler_driver_->GetCompilerOptions().GetNativeDebuggable();
+ const bool native_debuggable = code_generator_ != nullptr &&
+ code_generator_->GetCompilerOptions().GetNativeDebuggable();
ArenaBitVector* native_debug_info_locations = nullptr;
if (native_debuggable) {
native_debug_info_locations = FindNativeDebugInfoLocations();
@@ -434,7 +434,7 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall,
HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- /* method_load_data */ 0u
+ /* method_load_data= */ 0u
};
InvokeType invoke_type = dex_compilation_unit_->IsStatic() ? kStatic : kDirect;
HInvokeStaticOrDirect* invoke = new (allocator_) HInvokeStaticOrDirect(
@@ -448,14 +448,8 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
invoke_type,
target_method,
HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
- HandleInvoke(invoke,
- in_vregs,
- /* args */ nullptr,
- graph_->GetNumberOfVRegs() - in_vregs,
- /* is_range */ true,
- dex_file_->GetMethodShorty(method_idx),
- /* clinit_check */ nullptr,
- /* is_unresolved */ false);
+ RangeInstructionOperands operands(graph_->GetNumberOfVRegs() - in_vregs, in_vregs);
+ HandleInvoke(invoke, operands, dex_file_->GetMethodShorty(method_idx), /* is_unresolved= */ false);
// Add the return instruction.
if (return_type_ == DataType::Type::kVoid) {
@@ -472,22 +466,17 @@ void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) {
}
ArenaBitVector* HInstructionBuilder::FindNativeDebugInfoLocations() {
- // The callback gets called when the line number changes.
- // In other words, it marks the start of new java statement.
- struct Callback {
- static bool Position(void* ctx, const DexFile::PositionInfo& entry) {
- static_cast<ArenaBitVector*>(ctx)->SetBit(entry.address_);
- return false;
- }
- };
ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_,
code_item_accessor_.InsnsSizeInCodeUnits(),
- /* expandable */ false,
+ /* expandable= */ false,
kArenaAllocGraphBuilder);
locations->ClearAllBits();
- dex_file_->DecodeDebugPositionInfo(code_item_accessor_.DebugInfoOffset(),
- Callback::Position,
- locations);
+ // The visitor gets called when the line number changes.
+ // In other words, it marks the start of new java statement.
+ code_item_accessor_.DecodeDebugPositionInfo([&](const DexFile::PositionInfo& entry) {
+ locations->SetBit(entry.address_);
+ return false;
+ });
// Instruction-specific tweaks.
for (const DexInstructionPcPair& inst : code_item_accessor_) {
switch (inst->Opcode()) {
@@ -570,7 +559,7 @@ void HInstructionBuilder::InitializeParameters() {
uint16_t locals_index = graph_->GetNumberOfLocalVRegs();
uint16_t parameter_index = 0;
- const DexFile::MethodId& referrer_method_id =
+ const dex::MethodId& referrer_method_id =
dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
if (!dex_compilation_unit_->IsStatic()) {
// Add the implicit 'this' argument, not expressed in the signature.
@@ -578,7 +567,7 @@ void HInstructionBuilder::InitializeParameters() {
referrer_method_id.class_idx_,
parameter_index++,
DataType::Type::kReference,
- /* is_this */ true);
+ /* is_this= */ true);
AppendInstruction(parameter);
UpdateLocal(locals_index++, parameter);
number_of_parameters--;
@@ -587,15 +576,15 @@ void HInstructionBuilder::InitializeParameters() {
DCHECK(current_this_parameter_ == nullptr);
}
- const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
- const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
+ const dex::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id);
+ const dex::TypeList* arg_types = dex_file_->GetProtoParameters(proto);
for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) {
HParameterValue* parameter = new (allocator_) HParameterValue(
*dex_file_,
arg_types->GetTypeItem(shorty_pos - 1).type_idx_,
parameter_index++,
DataType::FromShorty(shorty[shorty_pos]),
- /* is_this */ false);
+ /* is_this= */ false);
++shorty_pos;
AppendInstruction(parameter);
// Store the parameter value in the local that the dex code will use
@@ -720,20 +709,18 @@ void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse
// Does the method being compiled need any constructor barriers being inserted?
// (Always 'false' for methods that aren't <init>.)
-static bool RequiresConstructorBarrier(const DexCompilationUnit* cu, CompilerDriver* driver) {
+static bool RequiresConstructorBarrier(const DexCompilationUnit* cu) {
// Can be null in unit tests only.
if (UNLIKELY(cu == nullptr)) {
return false;
}
- Thread* self = Thread::Current();
- return cu->IsConstructor()
- && !cu->IsStatic()
- // RequiresConstructorBarrier must only be queried for <init> methods;
- // it's effectively "false" for every other method.
- //
- // See CompilerDriver::RequiresConstructBarrier for more explanation.
- && driver->RequiresConstructorBarrier(self, cu->GetDexFile(), cu->GetClassDefIndex());
+ // Constructor barriers are applicable only for <init> methods.
+ if (LIKELY(!cu->IsConstructor() || cu->IsStatic())) {
+ return false;
+ }
+
+ return cu->RequiresConstructorBarrier();
}
// Returns true if `block` has only one successor which starts at the next
@@ -779,7 +766,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction,
// Only <init> (which is a return-void) could possibly have a constructor fence.
// This may insert additional redundant constructor fences from the super constructors.
// TODO: remove redundant constructor fences (b/36656456).
- if (RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)) {
+ if (RequiresConstructorBarrier(dex_compilation_unit_)) {
// Compiling instance constructor.
DCHECK_STREQ("<init>", graph_->GetMethodName());
@@ -793,7 +780,7 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction,
}
AppendInstruction(new (allocator_) HReturnVoid(dex_pc));
} else {
- DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_));
+ DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_));
HInstruction* value = LoadLocal(instruction.VRegA(), type);
AppendInstruction(new (allocator_) HReturn(value, dex_pc));
}
@@ -860,7 +847,7 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
// make this an invoke-unresolved to handle cross-dex invokes or abstract super methods, both of
// which require runtime handling.
if (invoke_type == kSuper) {
- ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
+ ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get();
if (compiling_class == nullptr) {
// We could not determine the method's class we need to wait until runtime.
DCHECK(Runtime::Current()->IsAotCompiler());
@@ -890,8 +877,8 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in
// The back-end code generator relies on this check in order to ensure that it will not
// attempt to read the dex_cache with a dex_method_index that is not from the correct
// dex_file. If we didn't do this check then the dex_method_index will not be updated in the
- // builder, which means that the code-generator (and compiler driver during sharpening and
- // inliner, maybe) might invoke an incorrect method.
+ // builder, which means that the code-generator (and sharpening and inliner, maybe)
+ // might invoke an incorrect method.
// TODO: The actual method could still be referenced in the current dex file, so we
// could try locating it.
// TODO: Remove the dex_file restriction.
@@ -916,16 +903,13 @@ static bool IsStringConstructor(ArtMethod* method) {
bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
uint32_t dex_pc,
uint32_t method_idx,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
+ const InstructionOperands& operands) {
InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode());
- const char* descriptor = dex_file_->GetMethodShorty(method_idx);
- DataType::Type return_type = DataType::FromShorty(descriptor[0]);
+ const char* shorty = dex_file_->GetMethodShorty(method_idx);
+ DataType::Type return_type = DataType::FromShorty(shorty[0]);
// Remove the return type from the 'proto'.
- size_t number_of_arguments = strlen(descriptor) - 1;
+ size_t number_of_arguments = strlen(shorty) - 1;
if (invoke_type != kStatic) { // instance call
// One extra argument for 'this'.
number_of_arguments++;
@@ -942,14 +926,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
dex_pc,
method_idx,
invoke_type);
- return HandleInvoke(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- nullptr, /* clinit_check */
- true /* is_unresolved */);
+ return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ true);
}
// Replace calls to String.<init> with StringFactory.
@@ -968,20 +945,15 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
HInvoke* invoke = new (allocator_) HInvokeStaticOrDirect(
allocator_,
number_of_arguments - 1,
- DataType::Type::kReference /*return_type */,
+ /* return_type= */ DataType::Type::kReference,
dex_pc,
method_idx,
- nullptr /* resolved_method */,
+ /* resolved_method= */ nullptr,
dispatch_info,
invoke_type,
target_method,
HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit);
- return HandleStringInit(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor);
+ return HandleStringInit(invoke, operands, shorty);
}
// Potential class initialization check, in the case of a static method call.
@@ -994,8 +966,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
= HInvokeStaticOrDirect::ClinitCheckRequirement::kImplicit;
ScopedObjectAccess soa(Thread::Current());
if (invoke_type == kStatic) {
- clinit_check = ProcessClinitCheckForInvoke(
- dex_pc, resolved_method, &clinit_check_requirement);
+ clinit_check =
+ ProcessClinitCheckForInvoke(dex_pc, resolved_method, &clinit_check_requirement);
} else if (invoke_type == kSuper) {
if (IsSameDexFile(*resolved_method->GetDexFile(), *dex_compilation_unit_->GetDexFile())) {
// Update the method index to the one resolved. Note that this may be a no-op if
@@ -1004,11 +976,8 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
}
}
- HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
- HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u
- };
+ HInvokeStaticOrDirect::DispatchInfo dispatch_info =
+ HSharpening::SharpenInvokeStaticOrDirect(resolved_method, code_generator_);
MethodReference target_method(resolved_method->GetDexFile(),
resolved_method->GetDexMethodIndex());
invoke = new (allocator_) HInvokeStaticOrDirect(allocator_,
@@ -1041,42 +1010,39 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction,
resolved_method,
ImTable::GetImtIndex(resolved_method));
}
-
- return HandleInvoke(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- clinit_check,
- false /* is_unresolved */);
+ return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false, clinit_check);
}
-bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction ATTRIBUTE_UNUSED,
- uint32_t dex_pc,
+bool HInstructionBuilder::BuildInvokePolymorphic(uint32_t dex_pc,
uint32_t method_idx,
- uint32_t proto_idx,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
- const char* descriptor = dex_file_->GetShorty(proto_idx);
- DCHECK_EQ(1 + ArtMethod::NumArgRegisters(descriptor), number_of_vreg_arguments);
- DataType::Type return_type = DataType::FromShorty(descriptor[0]);
- size_t number_of_arguments = strlen(descriptor);
+ dex::ProtoIndex proto_idx,
+ const InstructionOperands& operands) {
+ const char* shorty = dex_file_->GetShorty(proto_idx);
+ DCHECK_EQ(1 + ArtMethod::NumArgRegisters(shorty), operands.GetNumberOfOperands());
+ DataType::Type return_type = DataType::FromShorty(shorty[0]);
+ size_t number_of_arguments = strlen(shorty);
HInvoke* invoke = new (allocator_) HInvokePolymorphic(allocator_,
number_of_arguments,
return_type,
dex_pc,
method_idx);
- return HandleInvoke(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- nullptr /* clinit_check */,
- false /* is_unresolved */);
+ return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
+}
+
+
+bool HInstructionBuilder::BuildInvokeCustom(uint32_t dex_pc,
+ uint32_t call_site_idx,
+ const InstructionOperands& operands) {
+ dex::ProtoIndex proto_idx = dex_file_->GetProtoIndexForCallSite(call_site_idx);
+ const char* shorty = dex_file_->GetShorty(proto_idx);
+ DataType::Type return_type = DataType::FromShorty(shorty[0]);
+ size_t number_of_arguments = strlen(shorty) - 1;
+ HInvoke* invoke = new (allocator_) HInvokeCustom(allocator_,
+ number_of_arguments,
+ call_site_idx,
+ return_type,
+ dex_pc);
+ return HandleInvoke(invoke, operands, shorty, /* is_unresolved= */ false);
}
HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, uint32_t dex_pc) {
@@ -1099,6 +1065,10 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u
if (load_class->NeedsAccessCheck() || klass->IsFinalizable() || !klass->IsInstantiable()) {
entrypoint = kQuickAllocObjectWithChecks;
}
+ // We will always be able to resolve the string class since it is in the BCP.
+ if (!klass.IsNull() && klass->IsStringClass()) {
+ entrypoint = kQuickAllocStringObject;
+ }
// Consider classes we haven't resolved as potentially finalizable.
bool finalizable = (klass == nullptr) || klass->IsFinalizable();
@@ -1167,30 +1137,219 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc
MethodCompilationStat::kConstructorFenceGeneratedNew);
}
+static bool IsInBootImage(ObjPtr<mirror::Class> cls, const CompilerOptions& compiler_options)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ if (compiler_options.IsBootImage()) {
+ std::string temp;
+ const char* descriptor = cls->GetDescriptor(&temp);
+ return compiler_options.IsImageClass(descriptor);
+ } else {
+ return Runtime::Current()->GetHeap()->FindSpaceFromObject(cls, false)->IsImageSpace();
+ }
+}
+
static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class)
REQUIRES_SHARED(Locks::mutator_lock_) {
return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class);
}
+static bool HasTrivialClinit(ObjPtr<mirror::Class> klass, PointerSize pointer_size)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ // Check if the class has encoded fields that trigger bytecode execution.
+ // (Encoded fields are just a different representation of <clinit>.)
+ if (klass->NumStaticFields() != 0u) {
+ DCHECK(klass->GetClassDef() != nullptr);
+ EncodedStaticFieldValueIterator it(klass->GetDexFile(), *klass->GetClassDef());
+ for (; it.HasNext(); it.Next()) {
+ switch (it.GetValueType()) {
+ case EncodedArrayValueIterator::ValueType::kBoolean:
+ case EncodedArrayValueIterator::ValueType::kByte:
+ case EncodedArrayValueIterator::ValueType::kShort:
+ case EncodedArrayValueIterator::ValueType::kChar:
+ case EncodedArrayValueIterator::ValueType::kInt:
+ case EncodedArrayValueIterator::ValueType::kLong:
+ case EncodedArrayValueIterator::ValueType::kFloat:
+ case EncodedArrayValueIterator::ValueType::kDouble:
+ case EncodedArrayValueIterator::ValueType::kNull:
+ case EncodedArrayValueIterator::ValueType::kString:
+ // Primitive, null or j.l.String initialization is permitted.
+ break;
+ case EncodedArrayValueIterator::ValueType::kType:
+ // Type initialization can load classes and execute bytecode through a class loader
+ // which can execute arbitrary bytecode. We do not optimize for known class loaders;
+ // kType is rarely used (if ever).
+ return false;
+ default:
+ // Other types in the encoded static field list are rejected by the DexFileVerifier.
+ LOG(FATAL) << "Unexpected type " << it.GetValueType();
+ UNREACHABLE();
+ }
+ }
+ }
+ // Check if the class has <clinit> that executes arbitrary code.
+ // Initialization of static fields of the class itself with constants is allowed.
+ ArtMethod* clinit = klass->FindClassInitializer(pointer_size);
+ if (clinit != nullptr) {
+ const DexFile& dex_file = *clinit->GetDexFile();
+ CodeItemInstructionAccessor accessor(dex_file, clinit->GetCodeItem());
+ for (DexInstructionPcPair it : accessor) {
+ switch (it->Opcode()) {
+ case Instruction::CONST_4:
+ case Instruction::CONST_16:
+ case Instruction::CONST:
+ case Instruction::CONST_HIGH16:
+ case Instruction::CONST_WIDE_16:
+ case Instruction::CONST_WIDE_32:
+ case Instruction::CONST_WIDE:
+ case Instruction::CONST_WIDE_HIGH16:
+ case Instruction::CONST_STRING:
+ case Instruction::CONST_STRING_JUMBO:
+ // Primitive, null or j.l.String initialization is permitted.
+ break;
+ case Instruction::RETURN_VOID:
+ case Instruction::RETURN_VOID_NO_BARRIER:
+ break;
+ case Instruction::SPUT:
+ case Instruction::SPUT_WIDE:
+ case Instruction::SPUT_OBJECT:
+ case Instruction::SPUT_BOOLEAN:
+ case Instruction::SPUT_BYTE:
+ case Instruction::SPUT_CHAR:
+ case Instruction::SPUT_SHORT:
+ // Only initialization of a static field of the same class is permitted.
+ if (dex_file.GetFieldId(it->VRegB_21c()).class_idx_ != klass->GetDexTypeIndex()) {
+ return false;
+ }
+ break;
+ case Instruction::NEW_ARRAY:
+ // Only primitive arrays are permitted.
+ if (Primitive::GetType(dex_file.GetTypeDescriptor(dex_file.GetTypeId(
+ dex::TypeIndex(it->VRegC_22c())))[1]) == Primitive::kPrimNot) {
+ return false;
+ }
+ break;
+ case Instruction::APUT:
+ case Instruction::APUT_WIDE:
+ case Instruction::APUT_BOOLEAN:
+ case Instruction::APUT_BYTE:
+ case Instruction::APUT_CHAR:
+ case Instruction::APUT_SHORT:
+ case Instruction::FILL_ARRAY_DATA:
+ case Instruction::NOP:
+ // Allow initialization of primitive arrays (only constants can be stored).
+ // Note: We expect NOPs used for fill-array-data-payload but accept all NOPs
+ // (even unreferenced switch payloads if they make it through the verifier).
+ break;
+ default:
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+static bool HasTrivialInitialization(ObjPtr<mirror::Class> cls,
+ const CompilerOptions& compiler_options)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ Runtime* runtime = Runtime::Current();
+ PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
+
+ // Check the superclass chain.
+ for (ObjPtr<mirror::Class> klass = cls; klass != nullptr; klass = klass->GetSuperClass()) {
+ if (klass->IsInitialized() && IsInBootImage(klass, compiler_options)) {
+ break; // `klass` and its superclasses are already initialized in the boot image.
+ }
+ if (!HasTrivialClinit(klass, pointer_size)) {
+ return false;
+ }
+ }
+
+ // Also check interfaces with default methods as they need to be initialized as well.
+ ObjPtr<mirror::IfTable> iftable = cls->GetIfTable();
+ DCHECK(iftable != nullptr);
+ for (int32_t i = 0, count = iftable->Count(); i != count; ++i) {
+ ObjPtr<mirror::Class> iface = iftable->GetInterface(i);
+ if (!iface->HasDefaultMethods()) {
+ continue; // Initializing `cls` does not initialize this interface.
+ }
+ if (iface->IsInitialized() && IsInBootImage(iface, compiler_options)) {
+ continue; // This interface is already initialized in the boot image.
+ }
+ if (!HasTrivialClinit(iface, pointer_size)) {
+ return false;
+ }
+ }
+ return true;
+}
+
bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
if (cls == nullptr) {
return false;
}
- // `CanAssumeClassIsLoaded` will return true if we're JITting, or will
- // check whether the class is in an image for the AOT compilation.
- if (cls->IsInitialized() &&
- compiler_driver_->CanAssumeClassIsLoaded(cls.Get())) {
- return true;
- }
-
- if (IsSubClass(GetOutermostCompilingClass(), cls.Get())) {
+ // Check if the class will be initialized at runtime.
+ if (cls->IsInitialized()) {
+ Runtime* runtime = Runtime::Current();
+ if (!runtime->IsAotCompiler()) {
+ DCHECK(runtime->UseJitCompilation());
+ // For JIT, the class cannot revert to an uninitialized state.
+ return true;
+ }
+ // Assume loaded only if klass is in the boot image. App classes cannot be assumed
+ // loaded because we don't even know what class loader will be used to load them.
+ if (IsInBootImage(cls.Get(), code_generator_->GetCompilerOptions())) {
+ return true;
+ }
+ }
+
+ // We can avoid the class initialization check for `cls` in static methods and constructors
+ // in the very same class; invoking a static method involves a class initialization check
+ // and so does the instance allocation that must be executed before invoking a constructor.
+ // Other instance methods of the same class can run on an escaped instance
+ // of an erroneous class. Even a superclass may need to be checked as the subclass
+ // can be completely initialized while the superclass is initializing and the subclass
+ // remains initialized when the superclass initializer throws afterwards. b/62478025
+ // Note: The HClinitCheck+HInvokeStaticOrDirect merging can still apply.
+ auto is_static_method_or_constructor_of_cls = [cls](const DexCompilationUnit& compilation_unit)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ return (compilation_unit.GetAccessFlags() & (kAccStatic | kAccConstructor)) != 0u &&
+ compilation_unit.GetCompilingClass().Get() == cls.Get();
+ };
+ if (is_static_method_or_constructor_of_cls(*outer_compilation_unit_) ||
+ // Check also the innermost method. Though excessive copies of ClinitCheck can be
+ // eliminated by GVN, that happens only after the decision whether to inline the
+ // graph or not and that may depend on the presence of the ClinitCheck.
+ // TODO: We should walk over the entire inlined method chain, but we don't pass that
+ // information to the builder.
+ is_static_method_or_constructor_of_cls(*dex_compilation_unit_)) {
return true;
}
- // TODO: We should walk over the inlined methods, but we don't pass
- // that information to the builder.
- if (IsSubClass(GetCompilingClass(), cls.Get())) {
+ // Otherwise, we may be able to avoid the check if `cls` is a superclass of a method being
+ // compiled here (anywhere in the inlining chain) as the `cls` must have started initializing
+ // before calling any `cls` or subclass methods. Static methods require a clinit check and
+ // instance methods require an instance which cannot be created before doing a clinit check.
+ // When a subclass of `cls` starts initializing, it starts initializing its superclass
+ // chain up to `cls` without running any bytecode, i.e. without any opportunity for circular
+ // initialization weirdness.
+ //
+ // If the initialization of `cls` is trivial (`cls` and its superclasses and superinterfaces
+ // with default methods initialize only their own static fields using constant values), it must
+ // complete, either successfully or by throwing and marking `cls` erroneous, without allocating
+ // any instances of `cls` or subclasses (or any other class) and without calling any methods.
+ // If it completes by throwing, no instances of `cls` shall be created and no subclass method
+ // bytecode shall execute (see above), therefore the instruction we're building shall be
+ // unreachable. By reaching the instruction, we know that `cls` was initialized successfully.
+ //
+ // TODO: We should walk over the entire inlined methods chain, but we don't pass that
+ // information to the builder. (We could also check if we're guaranteed a non-null instance
+ // of `cls` at this location but that's outside the scope of the instruction builder.)
+ bool is_subclass = IsSubClass(outer_compilation_unit_->GetCompilingClass().Get(), cls.Get());
+ if (dex_compilation_unit_ != outer_compilation_unit_) {
+ is_subclass = is_subclass ||
+ IsSubClass(dex_compilation_unit_->GetCompilingClass().Get(), cls.Get());
+ }
+ if (is_subclass && HasTrivialInitialization(cls.Get(), code_generator_->GetCompilerOptions())) {
return true;
}
@@ -1198,9 +1357,9 @@ bool HInstructionBuilder::IsInitialized(Handle<mirror::Class> cls) const {
}
HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
- uint32_t dex_pc,
- ArtMethod* resolved_method,
- HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
+ uint32_t dex_pc,
+ ArtMethod* resolved_method,
+ HInvokeStaticOrDirect::ClinitCheckRequirement* clinit_check_requirement) {
Handle<mirror::Class> klass = handles_->NewHandle(resolved_method->GetDeclaringClass());
HClinitCheck* clinit_check = nullptr;
@@ -1211,7 +1370,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
klass->GetDexFile(),
klass,
dex_pc,
- /* needs_access_check */ false);
+ /* needs_access_check= */ false);
if (cls != nullptr) {
*clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit;
clinit_check = new (allocator_) HClinitCheck(cls, dex_pc);
@@ -1222,26 +1381,22 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke(
}
bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor,
+ const InstructionOperands& operands,
+ const char* shorty,
size_t start_index,
size_t* argument_index) {
- uint32_t descriptor_index = 1; // Skip the return type.
-
+ uint32_t shorty_index = 1; // Skip the return type.
+ const size_t number_of_operands = operands.GetNumberOfOperands();
for (size_t i = start_index;
// Make sure we don't go over the expected arguments or over the number of
// dex registers given. If the instruction was seen as dead by the verifier,
// it hasn't been properly checked.
- (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments());
+ (i < number_of_operands) && (*argument_index < invoke->GetNumberOfArguments());
i++, (*argument_index)++) {
- DataType::Type type = DataType::FromShorty(descriptor[descriptor_index++]);
+ DataType::Type type = DataType::FromShorty(shorty[shorty_index++]);
bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64);
- if (!is_range
- && is_wide
- && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) {
+ if (is_wide && ((i + 1 == number_of_operands) ||
+ (operands.GetOperand(i) + 1 != operands.GetOperand(i + 1)))) {
// Longs and doubles should be in pairs, that is, sequential registers. The verifier should
// reject any class where this is violated. However, the verifier only does these checks
// on non trivially dead instructions, so we just bailout the compilation.
@@ -1252,7 +1407,7 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
MethodCompilationStat::kNotCompiledMalformedOpcode);
return false;
}
- HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type);
+ HInstruction* arg = LoadLocal(operands.GetOperand(i), type);
invoke->SetArgumentAt(*argument_index, arg);
if (is_wide) {
i++;
@@ -1279,19 +1434,16 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke,
}
bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor,
- HClinitCheck* clinit_check,
- bool is_unresolved) {
+ const InstructionOperands& operands,
+ const char* shorty,
+ bool is_unresolved,
+ HClinitCheck* clinit_check) {
DCHECK(!invoke->IsInvokeStaticOrDirect() || !invoke->AsInvokeStaticOrDirect()->IsStringInit());
size_t start_index = 0;
size_t argument_index = 0;
if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call.
- uint32_t obj_reg = is_range ? register_index : args[0];
+ uint32_t obj_reg = operands.GetOperand(0);
HInstruction* arg = is_unresolved
? LoadLocal(obj_reg, DataType::Type::kReference)
: LoadNullCheckedLocal(obj_reg, invoke->GetDexPc());
@@ -1300,14 +1452,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
argument_index = 1;
}
- if (!SetupInvokeArguments(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- start_index,
- &argument_index)) {
+ if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) {
return false;
}
@@ -1327,24 +1472,14 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke,
}
bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor) {
+ const InstructionOperands& operands,
+ const char* shorty) {
DCHECK(invoke->IsInvokeStaticOrDirect());
DCHECK(invoke->AsInvokeStaticOrDirect()->IsStringInit());
size_t start_index = 1;
size_t argument_index = 0;
- if (!SetupInvokeArguments(invoke,
- number_of_vreg_arguments,
- args,
- register_index,
- is_range,
- descriptor,
- start_index,
- &argument_index)) {
+ if (!SetupInvokeArguments(invoke, operands, shorty, start_index, &argument_index)) {
return false;
}
@@ -1352,31 +1487,35 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke,
// This is a StringFactory call, not an actual String constructor. Its result
// replaces the empty String pre-allocated by NewInstance.
- uint32_t orig_this_reg = is_range ? register_index : args[0];
+ uint32_t orig_this_reg = operands.GetOperand(0);
HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference);
// Replacing the NewInstance might render it redundant. Keep a list of these
- // to be visited once it is clear whether it is has remaining uses.
+ // to be visited once it is clear whether it has remaining uses.
if (arg_this->IsNewInstance()) {
ssa_builder_->AddUninitializedString(arg_this->AsNewInstance());
} else {
DCHECK(arg_this->IsPhi());
- // NewInstance is not the direct input of the StringFactory call. It might
- // be redundant but optimizing this case is not worth the effort.
+ // We can get a phi as input of a String.<init> if there is a loop between the
+ // allocation and the String.<init> call. As we don't know which other phis might alias
+ // with `arg_this`, we keep a record of those invocations so we can later replace
+ // the allocation with the invocation.
+ // Add the actual 'this' input so the analysis knows what is the allocation instruction.
+ // The input will be removed during the analysis.
+ invoke->AddInput(arg_this);
+ ssa_builder_->AddUninitializedStringPhi(invoke);
}
-
// Walk over all vregs and replace any occurrence of `arg_this` with `invoke`.
for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) {
if ((*current_locals_)[vreg] == arg_this) {
(*current_locals_)[vreg] = invoke;
}
}
-
return true;
}
static DataType::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) {
- const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index);
+ const dex::FieldId& field_id = dex_file.GetFieldId(field_index);
const char* type = dex_file.GetFieldTypeDescriptor(field_id);
return DataType::FromShorty(type[0]);
}
@@ -1400,7 +1539,7 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
}
ScopedObjectAccess soa(Thread::Current());
- ArtField* resolved_field = ResolveField(field_index, /* is_static */ false, is_put);
+ ArtField* resolved_field = ResolveField(field_index, /* is_static= */ false, is_put);
// Generate an explicit null check on the reference, unless the field access
// is unresolved. In that case, we rely on the runtime to perform various
@@ -1463,41 +1602,6 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio
return true;
}
-static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver,
- const DexCompilationUnit& compilation_unit) {
- ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader();
- Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache();
-
- return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit);
-}
-
-ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const {
- return GetClassFrom(compiler_driver_, *outer_compilation_unit_);
-}
-
-ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const {
- return GetClassFrom(compiler_driver_, *dex_compilation_unit_);
-}
-
-bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) const {
- ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<2> hs(soa.Self());
- Handle<mirror::DexCache> dex_cache = dex_compilation_unit_->GetDexCache();
- Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> cls(hs.NewHandle(compiler_driver_->ResolveClass(
- soa, dex_cache, class_loader, type_index, dex_compilation_unit_)));
- Handle<mirror::Class> outer_class(hs.NewHandle(GetOutermostCompilingClass()));
-
- // GetOutermostCompilingClass returns null when the class is unresolved
- // (e.g. if it derives from an unresolved class). This is bogus knowing that
- // we are compiling it.
- // When this happens we cannot establish a direct relation between the current
- // class and the outer class, so we return false.
- // (Note that this is only used for optimizing invokes and field accesses)
- return (cls != nullptr) && (outer_class.Get() == cls.Get());
-}
-
void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction,
uint32_t dex_pc,
bool is_put,
@@ -1517,18 +1621,17 @@ void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& in
ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, bool is_put) {
ScopedObjectAccess soa(Thread::Current());
- StackHandleScope<2> hs(soa.Self());
ClassLinker* class_linker = dex_compilation_unit_->GetClassLinker();
Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass()));
ArtField* resolved_field = class_linker->ResolveField(field_idx,
dex_compilation_unit_->GetDexCache(),
class_loader,
is_static);
+ DCHECK_EQ(resolved_field == nullptr, soa.Self()->IsExceptionPending());
if (UNLIKELY(resolved_field == nullptr)) {
- // Clean up any exception left by type resolution.
+ // Clean up any exception left by field resolution.
soa.Self()->ClearException();
return nullptr;
}
@@ -1540,6 +1643,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static,
}
// Check access.
+ Handle<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass();
if (compiling_class == nullptr) {
if (!resolved_field->IsPublic()) {
return nullptr;
@@ -1569,7 +1673,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
uint16_t field_index = instruction.VRegB_21c();
ScopedObjectAccess soa(Thread::Current());
- ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put);
+ ArtField* resolved_field = ResolveField(field_index, /* is_static= */ true, is_put);
if (resolved_field == nullptr) {
MaybeRecordStat(compilation_stats_,
@@ -1586,7 +1690,7 @@ void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction,
klass->GetDexFile(),
klass,
dex_pc,
- /* needs_access_check */ false);
+ /* needs_access_check= */ false);
if (constant == nullptr) {
// The class cannot be referenced from this compiled code. Generate
@@ -1697,17 +1801,27 @@ void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction,
graph_->SetHasBoundsChecks(true);
}
+HNewArray* HInstructionBuilder::BuildNewArray(uint32_t dex_pc,
+ dex::TypeIndex type_index,
+ HInstruction* length) {
+ HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
+
+ const char* descriptor = dex_file_->GetTypeDescriptor(dex_file_->GetTypeId(type_index));
+ DCHECK_EQ(descriptor[0], '[');
+ size_t component_type_shift = Primitive::ComponentSizeShift(Primitive::GetType(descriptor[1]));
+
+ HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc, component_type_shift);
+ AppendInstruction(new_array);
+ return new_array;
+}
+
HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index) {
- HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
- HNewArray* const object = new (allocator_) HNewArray(cls, length, dex_pc);
- AppendInstruction(object);
+ const InstructionOperands& operands) {
+ const size_t number_of_operands = operands.GetNumberOfOperands();
+ HInstruction* length = graph_->GetIntConstant(number_of_operands, dex_pc);
+ HNewArray* new_array = BuildNewArray(dex_pc, type_index, length);
const char* descriptor = dex_file_->StringByTypeIdx(type_index);
DCHECK_EQ(descriptor[0], '[') << descriptor;
char primitive = descriptor[1];
@@ -1717,16 +1831,16 @@ HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc,
bool is_reference_array = (primitive == 'L') || (primitive == '[');
DataType::Type type = is_reference_array ? DataType::Type::kReference : DataType::Type::kInt32;
- for (size_t i = 0; i < number_of_vreg_arguments; ++i) {
- HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type);
+ for (size_t i = 0; i < number_of_operands; ++i) {
+ HInstruction* value = LoadLocal(operands.GetOperand(i), type);
HInstruction* index = graph_->GetIntConstant(i, dex_pc);
- HArraySet* aset = new (allocator_) HArraySet(object, index, value, type, dex_pc);
+ HArraySet* aset = new (allocator_) HArraySet(new_array, index, value, type, dex_pc);
ssa_builder_->MaybeAddAmbiguousArraySet(aset);
AppendInstruction(aset);
}
- latest_result_ = object;
+ latest_result_ = new_array;
- return object;
+ return new_array;
}
template <typename T>
@@ -1815,35 +1929,11 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object,
}
}
-static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls)
- REQUIRES_SHARED(Locks::mutator_lock_) {
- if (cls == nullptr) {
- return TypeCheckKind::kUnresolvedCheck;
- } else if (cls->IsInterface()) {
- return TypeCheckKind::kInterfaceCheck;
- } else if (cls->IsArrayClass()) {
- if (cls->GetComponentType()->IsObjectClass()) {
- return TypeCheckKind::kArrayObjectCheck;
- } else if (cls->CannotBeAssignedFromOtherTypes()) {
- return TypeCheckKind::kExactCheck;
- } else {
- return TypeCheckKind::kArrayCheck;
- }
- } else if (cls->IsFinal()) {
- return TypeCheckKind::kExactCheck;
- } else if (cls->IsAbstract()) {
- return TypeCheckKind::kAbstractClassCheck;
- } else {
- return TypeCheckKind::kClassHierarchyCheck;
- }
-}
-
void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) {
HLoadString* load_string =
new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc);
HSharpening::ProcessLoadString(load_string,
code_generator_,
- compiler_driver_,
*dex_compilation_unit_,
handles_);
AppendInstruction(load_string);
@@ -1852,22 +1942,8 @@ void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_
HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) {
ScopedObjectAccess soa(Thread::Current());
const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
- Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader();
- Handle<mirror::Class> klass = handles_->NewHandle(compiler_driver_->ResolveClass(
- soa, dex_compilation_unit_->GetDexCache(), class_loader, type_index, dex_compilation_unit_));
-
- bool needs_access_check = true;
- if (klass != nullptr) {
- if (klass->IsPublic()) {
- needs_access_check = false;
- } else {
- ObjPtr<mirror::Class> compiling_class = GetCompilingClass();
- if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) {
- needs_access_check = false;
- }
- }
- }
-
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
return BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
}
@@ -1888,18 +1964,19 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
}
// Note: `klass` must be from `handles_`.
+ bool is_referrers_class =
+ (klass != nullptr) && (outer_compilation_unit_->GetCompilingClass().Get() == klass.Get());
HLoadClass* load_class = new (allocator_) HLoadClass(
graph_->GetCurrentMethod(),
type_index,
*actual_dex_file,
klass,
- klass != nullptr && (klass.Get() == GetOutermostCompilingClass()),
+ is_referrers_class,
dex_pc,
needs_access_check);
HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class,
code_generator_,
- compiler_driver_,
*dex_compilation_unit_);
if (load_kind == HLoadClass::LoadKind::kInvalid) {
@@ -1912,35 +1989,109 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index,
return load_class;
}
+Handle<mirror::Class> HInstructionBuilder::ResolveClass(ScopedObjectAccess& soa,
+ dex::TypeIndex type_index) {
+ auto it = class_cache_.find(type_index);
+ if (it != class_cache_.end()) {
+ return it->second;
+ }
+
+ ObjPtr<mirror::Class> klass = dex_compilation_unit_->GetClassLinker()->ResolveType(
+ type_index, dex_compilation_unit_->GetDexCache(), dex_compilation_unit_->GetClassLoader());
+ DCHECK_EQ(klass == nullptr, soa.Self()->IsExceptionPending());
+ soa.Self()->ClearException(); // Clean up the exception left by type resolution if any.
+
+ Handle<mirror::Class> h_klass = handles_->NewHandle(klass);
+ class_cache_.Put(type_index, h_klass);
+ return h_klass;
+}
+
+bool HInstructionBuilder::LoadClassNeedsAccessCheck(Handle<mirror::Class> klass) {
+ if (klass == nullptr) {
+ return true;
+ } else if (klass->IsPublic()) {
+ return false;
+ } else {
+ ObjPtr<mirror::Class> compiling_class = dex_compilation_unit_->GetCompilingClass().Get();
+ return compiling_class == nullptr || !compiling_class->CanAccess(klass.Get());
+ }
+}
+
+void HInstructionBuilder::BuildLoadMethodHandle(uint16_t method_handle_index, uint32_t dex_pc) {
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+ HLoadMethodHandle* load_method_handle = new (allocator_) HLoadMethodHandle(
+ graph_->GetCurrentMethod(), method_handle_index, dex_file, dex_pc);
+ AppendInstruction(load_method_handle);
+}
+
+void HInstructionBuilder::BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc) {
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+ HLoadMethodType* load_method_type =
+ new (allocator_) HLoadMethodType(graph_->GetCurrentMethod(), proto_index, dex_file, dex_pc);
+ AppendInstruction(load_method_type);
+}
+
void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction,
uint8_t destination,
uint8_t reference,
dex::TypeIndex type_index,
uint32_t dex_pc) {
HInstruction* object = LoadLocal(reference, DataType::Type::kReference);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
ScopedObjectAccess soa(Thread::Current());
- TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass());
+ const DexFile& dex_file = *dex_compilation_unit_->GetDexFile();
+ Handle<mirror::Class> klass = ResolveClass(soa, type_index);
+ bool needs_access_check = LoadClassNeedsAccessCheck(klass);
+ TypeCheckKind check_kind = HSharpening::ComputeTypeCheckKind(
+ klass.Get(), code_generator_, needs_access_check);
+
+ HInstruction* class_or_null = nullptr;
+ HIntConstant* bitstring_path_to_root = nullptr;
+ HIntConstant* bitstring_mask = nullptr;
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ // TODO: Allow using the bitstring check also if we need an access check.
+ DCHECK(!needs_access_check);
+ class_or_null = graph_->GetNullConstant(dex_pc);
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ uint32_t path_to_root =
+ SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootForTarget(klass.Get());
+ uint32_t mask = SubtypeCheck<ObjPtr<mirror::Class>>::GetEncodedPathToRootMask(klass.Get());
+ bitstring_path_to_root = graph_->GetIntConstant(static_cast<int32_t>(path_to_root), dex_pc);
+ bitstring_mask = graph_->GetIntConstant(static_cast<int32_t>(mask), dex_pc);
+ } else {
+ class_or_null = BuildLoadClass(type_index, dex_file, klass, dex_pc, needs_access_check);
+ }
+ DCHECK(class_or_null != nullptr);
+
if (instruction.Opcode() == Instruction::INSTANCE_OF) {
- AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc));
+ AppendInstruction(new (allocator_) HInstanceOf(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
UpdateLocal(destination, current_block_->GetLastInstruction());
} else {
DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST);
// We emit a CheckCast followed by a BoundType. CheckCast is a statement
// which may throw. If it succeeds BoundType sets the new type of `object`
// for all subsequent uses.
- AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc));
+ AppendInstruction(
+ new (allocator_) HCheckCast(object,
+ class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator_,
+ bitstring_path_to_root,
+ bitstring_mask));
AppendInstruction(new (allocator_) HBoundType(object, dex_pc));
UpdateLocal(reference, current_block_->GetLastInstruction());
}
}
-bool HInstructionBuilder::NeedsAccessCheck(dex::TypeIndex type_index, bool* finalizable) const {
- return !compiler_driver_->CanAccessInstantiableTypeWithoutChecks(
- LookupReferrerClass(), LookupResolvedType(type_index, *dex_compilation_unit_), finalizable);
-}
-
bool HInstructionBuilder::CanDecodeQuickenedInfo() const {
return !quicken_info_.IsNull();
}
@@ -2116,11 +2267,10 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
} else {
method_idx = instruction.VRegB_35c();
}
- uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
uint32_t args[5];
- instruction.GetVarArgs(args);
- if (!BuildInvoke(instruction, dex_pc, method_idx,
- number_of_vreg_arguments, false, args, -1)) {
+ uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args);
+ VarArgsInstructionOperands operands(args, number_of_vreg_arguments);
+ if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) {
return false;
}
break;
@@ -2143,10 +2293,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
} else {
method_idx = instruction.VRegB_3rc();
}
- uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
- uint32_t register_index = instruction.VRegC();
- if (!BuildInvoke(instruction, dex_pc, method_idx,
- number_of_vreg_arguments, true, nullptr, register_index)) {
+ RangeInstructionOperands operands(instruction.VRegC(), instruction.VRegA_3rc());
+ if (!BuildInvoke(instruction, dex_pc, method_idx, operands)) {
return false;
}
break;
@@ -2154,33 +2302,32 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::INVOKE_POLYMORPHIC: {
uint16_t method_idx = instruction.VRegB_45cc();
- uint16_t proto_idx = instruction.VRegH_45cc();
- uint32_t number_of_vreg_arguments = instruction.VRegA_45cc();
+ dex::ProtoIndex proto_idx(instruction.VRegH_45cc());
uint32_t args[5];
- instruction.GetVarArgs(args);
- return BuildInvokePolymorphic(instruction,
- dex_pc,
- method_idx,
- proto_idx,
- number_of_vreg_arguments,
- false,
- args,
- -1);
+ uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args);
+ VarArgsInstructionOperands operands(args, number_of_vreg_arguments);
+ return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands);
}
case Instruction::INVOKE_POLYMORPHIC_RANGE: {
uint16_t method_idx = instruction.VRegB_4rcc();
- uint16_t proto_idx = instruction.VRegH_4rcc();
- uint32_t number_of_vreg_arguments = instruction.VRegA_4rcc();
- uint32_t register_index = instruction.VRegC_4rcc();
- return BuildInvokePolymorphic(instruction,
- dex_pc,
- method_idx,
- proto_idx,
- number_of_vreg_arguments,
- true,
- nullptr,
- register_index);
+ dex::ProtoIndex proto_idx(instruction.VRegH_4rcc());
+ RangeInstructionOperands operands(instruction.VRegC_4rcc(), instruction.VRegA_4rcc());
+ return BuildInvokePolymorphic(dex_pc, method_idx, proto_idx, operands);
+ }
+
+ case Instruction::INVOKE_CUSTOM: {
+ uint16_t call_site_idx = instruction.VRegB_35c();
+ uint32_t args[5];
+ uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args);
+ VarArgsInstructionOperands operands(args, number_of_vreg_arguments);
+ return BuildInvokeCustom(dex_pc, call_site_idx, operands);
+ }
+
+ case Instruction::INVOKE_CUSTOM_RANGE: {
+ uint16_t call_site_idx = instruction.VRegB_3rc();
+ RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc());
+ return BuildInvokeCustom(dex_pc, call_site_idx, operands);
}
case Instruction::NEG_INT: {
@@ -2718,40 +2865,27 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::NEW_ARRAY: {
dex::TypeIndex type_index(instruction.VRegC_22c());
HInstruction* length = LoadLocal(instruction.VRegB_22c(), DataType::Type::kInt32);
- HLoadClass* cls = BuildLoadClass(type_index, dex_pc);
+ HNewArray* new_array = BuildNewArray(dex_pc, type_index, length);
- HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc);
- AppendInstruction(new_array);
UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction());
BuildConstructorFenceForAllocation(new_array);
break;
}
case Instruction::FILLED_NEW_ARRAY: {
- uint32_t number_of_vreg_arguments = instruction.VRegA_35c();
dex::TypeIndex type_index(instruction.VRegB_35c());
uint32_t args[5];
- instruction.GetVarArgs(args);
- HNewArray* new_array = BuildFilledNewArray(dex_pc,
- type_index,
- number_of_vreg_arguments,
- /* is_range */ false,
- args,
- /* register_index */ 0);
+ uint32_t number_of_vreg_arguments = instruction.GetVarArgs(args);
+ VarArgsInstructionOperands operands(args, number_of_vreg_arguments);
+ HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands);
BuildConstructorFenceForAllocation(new_array);
break;
}
case Instruction::FILLED_NEW_ARRAY_RANGE: {
- uint32_t number_of_vreg_arguments = instruction.VRegA_3rc();
dex::TypeIndex type_index(instruction.VRegB_3rc());
- uint32_t register_index = instruction.VRegC_3rc();
- HNewArray* new_array = BuildFilledNewArray(dex_pc,
- type_index,
- number_of_vreg_arguments,
- /* is_range */ true,
- /* args*/ nullptr,
- register_index);
+ RangeInstructionOperands operands(instruction.VRegC_3rc(), instruction.VRegA_3rc());
+ HNewArray* new_array = BuildFilledNewArray(dex_pc, type_index, operands);
BuildConstructorFenceForAllocation(new_array);
break;
}
@@ -2812,7 +2946,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::IGET_CHAR_QUICK:
case Instruction::IGET_SHORT:
case Instruction::IGET_SHORT_QUICK: {
- if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ false, quicken_index)) {
+ if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ false, quicken_index)) {
return false;
}
break;
@@ -2832,7 +2966,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::IPUT_CHAR_QUICK:
case Instruction::IPUT_SHORT:
case Instruction::IPUT_SHORT_QUICK: {
- if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ true, quicken_index)) {
+ if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put= */ true, quicken_index)) {
return false;
}
break;
@@ -2845,7 +2979,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::SGET_BYTE:
case Instruction::SGET_CHAR:
case Instruction::SGET_SHORT: {
- BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ false);
+ BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ false);
break;
}
@@ -2856,7 +2990,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
case Instruction::SPUT_BYTE:
case Instruction::SPUT_CHAR:
case Instruction::SPUT_SHORT: {
- BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ true);
+ BuildStaticFieldAccess(instruction, dex_pc, /* is_put= */ true);
break;
}
@@ -2906,6 +3040,20 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
break;
}
+ case Instruction::CONST_METHOD_HANDLE: {
+ uint16_t method_handle_idx = instruction.VRegB_21c();
+ BuildLoadMethodHandle(method_handle_idx, dex_pc);
+ UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+ break;
+ }
+
+ case Instruction::CONST_METHOD_TYPE: {
+ dex::ProtoIndex proto_idx(instruction.VRegB_21c());
+ BuildLoadMethodType(proto_idx, dex_pc);
+ UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction());
+ break;
+ }
+
case Instruction::MOVE_EXCEPTION: {
AppendInstruction(new (allocator_) HLoadException(dex_pc));
UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction());
@@ -2959,7 +3107,21 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
break;
}
- default:
+ case Instruction::UNUSED_3E:
+ case Instruction::UNUSED_3F:
+ case Instruction::UNUSED_40:
+ case Instruction::UNUSED_41:
+ case Instruction::UNUSED_42:
+ case Instruction::UNUSED_43:
+ case Instruction::UNUSED_79:
+ case Instruction::UNUSED_7A:
+ case Instruction::UNUSED_F3:
+ case Instruction::UNUSED_F4:
+ case Instruction::UNUSED_F5:
+ case Instruction::UNUSED_F6:
+ case Instruction::UNUSED_F7:
+ case Instruction::UNUSED_F8:
+ case Instruction::UNUSED_F9: {
VLOG(compiler) << "Did not compile "
<< dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
<< " because of unhandled instruction "
@@ -2967,6 +3129,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction,
MaybeRecordStat(compilation_stats_,
MethodCompilationStat::kNotCompiledUnhandledInstruction);
return false;
+ }
}
return true;
} // NOLINT(readability/fn_size)
@@ -2980,7 +3143,7 @@ ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType(
ObjPtr<mirror::Class> HInstructionBuilder::LookupReferrerClass() const {
// TODO: Cache the result in a Handle<mirror::Class>.
- const DexFile::MethodId& method_id =
+ const dex::MethodId& method_id =
dex_compilation_unit_->GetDexFile()->GetMethodId(dex_compilation_unit_->GetDexMethodIndex());
return LookupResolvedType(method_id.class_idx_, *dex_compilation_unit_);
}
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index 4428c53277..d701445946 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -34,16 +34,18 @@ class ArenaBitVector;
class ArtField;
class ArtMethod;
class CodeGenerator;
-class CompilerDriver;
class DexCompilationUnit;
class HBasicBlockBuilder;
class Instruction;
+class InstructionOperands;
class OptimizingCompilerStats;
+class ScopedObjectAccess;
class SsaBuilder;
class VariableSizedHandleScope;
namespace mirror {
class Class;
+class MethodType;
} // namespace mirror
class HInstructionBuilder : public ValueObject {
@@ -56,7 +58,6 @@ class HInstructionBuilder : public ValueObject {
DataType::Type return_type,
const DexCompilationUnit* dex_compilation_unit,
const DexCompilationUnit* outer_compilation_unit,
- CompilerDriver* compiler_driver,
CodeGenerator* code_generator,
ArrayRef<const uint8_t> interpreter_metadata,
OptimizingCompilerStats* compiler_stats,
@@ -95,11 +96,6 @@ class HInstructionBuilder : public ValueObject {
void InitializeParameters();
- // Returns whether the current method needs access check for the type.
- // Output parameter finalizable is set to whether the type is finalizable.
- bool NeedsAccessCheck(dex::TypeIndex type_index, /*out*/bool* finalizable) const
- REQUIRES_SHARED(Locks::mutator_lock_);
-
template<typename T>
void Unop_12x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc);
@@ -166,29 +162,28 @@ class HInstructionBuilder : public ValueObject {
bool BuildInvoke(const Instruction& instruction,
uint32_t dex_pc,
uint32_t method_idx,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index);
+ const InstructionOperands& operands);
// Builds an invocation node for invoke-polymorphic and returns whether the
// instruction is supported.
- bool BuildInvokePolymorphic(const Instruction& instruction,
- uint32_t dex_pc,
+ bool BuildInvokePolymorphic(uint32_t dex_pc,
uint32_t method_idx,
- uint32_t proto_idx,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index);
+ dex::ProtoIndex proto_idx,
+ const InstructionOperands& operands);
+
+ // Builds an invocation node for invoke-custom and returns whether the
+ // instruction is supported.
+ bool BuildInvokeCustom(uint32_t dex_pc,
+ uint32_t call_site_idx,
+ const InstructionOperands& operands);
+
+ // Builds a new array node.
+ HNewArray* BuildNewArray(uint32_t dex_pc, dex::TypeIndex type_index, HInstruction* length);
// Builds a new array node and the instructions that fill it.
HNewArray* BuildFilledNewArray(uint32_t dex_pc,
dex::TypeIndex type_index,
- uint32_t number_of_vreg_arguments,
- bool is_range,
- uint32_t* args,
- uint32_t register_index);
+ const InstructionOperands& operands);
void BuildFillArrayData(const Instruction& instruction, uint32_t dex_pc);
@@ -232,43 +227,37 @@ class HInstructionBuilder : public ValueObject {
bool needs_access_check)
REQUIRES_SHARED(Locks::mutator_lock_);
- // Returns the outer-most compiling method's class.
- ObjPtr<mirror::Class> GetOutermostCompilingClass() const;
+ Handle<mirror::Class> ResolveClass(ScopedObjectAccess& soa, dex::TypeIndex type_index)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ bool LoadClassNeedsAccessCheck(Handle<mirror::Class> klass)
+ REQUIRES_SHARED(Locks::mutator_lock_);
- // Returns the class whose method is being compiled.
- ObjPtr<mirror::Class> GetCompilingClass() const;
+ // Builds a `HLoadMethodHandle` loading the given `method_handle_index`.
+ void BuildLoadMethodHandle(uint16_t method_handle_idx, uint32_t dex_pc);
- // Returns whether `type_index` points to the outer-most compiling method's class.
- bool IsOutermostCompilingClass(dex::TypeIndex type_index) const;
+ // Builds a `HLoadMethodType` loading the given `proto_index`.
+ void BuildLoadMethodType(dex::ProtoIndex proto_index, uint32_t dex_pc);
void PotentiallySimplifyFakeString(uint16_t original_dex_register,
uint32_t dex_pc,
HInvoke* invoke);
bool SetupInvokeArguments(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor,
+ const InstructionOperands& operands,
+ const char* shorty,
size_t start_index,
size_t* argument_index);
bool HandleInvoke(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor,
- HClinitCheck* clinit_check,
- bool is_unresolved);
+ const InstructionOperands& operands,
+ const char* shorty,
+ bool is_unresolved,
+ HClinitCheck* clinit_check = nullptr);
bool HandleStringInit(HInvoke* invoke,
- uint32_t number_of_vreg_arguments,
- uint32_t* args,
- uint32_t register_index,
- bool is_range,
- const char* descriptor);
+ const InstructionOperands& operands,
+ const char* shorty);
void HandleStringInitResult(HInvokeStaticOrDirect* invoke);
HClinitCheck* ProcessClinitCheckForInvoke(
@@ -316,8 +305,6 @@ class HInstructionBuilder : public ValueObject {
HBasicBlockBuilder* const block_builder_;
SsaBuilder* const ssa_builder_;
- CompilerDriver* const compiler_driver_;
-
CodeGenerator* const code_generator_;
// The compilation unit of the current method being compiled. Note that
@@ -347,6 +334,10 @@ class HInstructionBuilder : public ValueObject {
ScopedArenaVector<HBasicBlock*> loop_headers_;
+ // Cached resolved types for the current compilation unit's DexFile.
+ // Handle<>s reference entries in the `handles_`.
+ ScopedArenaSafeMap<dex::TypeIndex, Handle<mirror::Class>> class_cache_;
+
static constexpr int kDefaultNumberOfLoops = 2;
DISALLOW_COPY_AND_ASSIGN(HInstructionBuilder);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index a42a85dc1d..a433d7ef73 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -18,6 +18,7 @@
#include "art_method-inl.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "data_type-inl.h"
#include "escape.h"
#include "intrinsics.h"
@@ -35,14 +36,12 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
public:
InstructionSimplifierVisitor(HGraph* graph,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
OptimizingCompilerStats* stats)
: HGraphDelegateVisitor(graph),
codegen_(codegen),
- compiler_driver_(compiler_driver),
stats_(stats) {}
- void Run();
+ bool Run();
private:
void RecordSimplification() {
@@ -67,44 +66,44 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
bool TryCombineVecMultiplyAccumulate(HVecMul* mul);
void VisitShift(HBinaryOperation* shift);
-
- void VisitEqual(HEqual* equal) OVERRIDE;
- void VisitNotEqual(HNotEqual* equal) OVERRIDE;
- void VisitBooleanNot(HBooleanNot* bool_not) OVERRIDE;
- void VisitInstanceFieldSet(HInstanceFieldSet* equal) OVERRIDE;
- void VisitStaticFieldSet(HStaticFieldSet* equal) OVERRIDE;
- void VisitArraySet(HArraySet* equal) OVERRIDE;
- void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
- void VisitNullCheck(HNullCheck* instruction) OVERRIDE;
- void VisitArrayLength(HArrayLength* instruction) OVERRIDE;
- void VisitCheckCast(HCheckCast* instruction) OVERRIDE;
- void VisitAdd(HAdd* instruction) OVERRIDE;
- void VisitAnd(HAnd* instruction) OVERRIDE;
- void VisitCondition(HCondition* instruction) OVERRIDE;
- void VisitGreaterThan(HGreaterThan* condition) OVERRIDE;
- void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) OVERRIDE;
- void VisitLessThan(HLessThan* condition) OVERRIDE;
- void VisitLessThanOrEqual(HLessThanOrEqual* condition) OVERRIDE;
- void VisitBelow(HBelow* condition) OVERRIDE;
- void VisitBelowOrEqual(HBelowOrEqual* condition) OVERRIDE;
- void VisitAbove(HAbove* condition) OVERRIDE;
- void VisitAboveOrEqual(HAboveOrEqual* condition) OVERRIDE;
- void VisitDiv(HDiv* instruction) OVERRIDE;
- void VisitMul(HMul* instruction) OVERRIDE;
- void VisitNeg(HNeg* instruction) OVERRIDE;
- void VisitNot(HNot* instruction) OVERRIDE;
- void VisitOr(HOr* instruction) OVERRIDE;
- void VisitShl(HShl* instruction) OVERRIDE;
- void VisitShr(HShr* instruction) OVERRIDE;
- void VisitSub(HSub* instruction) OVERRIDE;
- void VisitUShr(HUShr* instruction) OVERRIDE;
- void VisitXor(HXor* instruction) OVERRIDE;
- void VisitSelect(HSelect* select) OVERRIDE;
- void VisitIf(HIf* instruction) OVERRIDE;
- void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE;
- void VisitInvoke(HInvoke* invoke) OVERRIDE;
- void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
- void VisitVecMul(HVecMul* instruction) OVERRIDE;
+ void VisitEqual(HEqual* equal) override;
+ void VisitNotEqual(HNotEqual* equal) override;
+ void VisitBooleanNot(HBooleanNot* bool_not) override;
+ void VisitInstanceFieldSet(HInstanceFieldSet* equal) override;
+ void VisitStaticFieldSet(HStaticFieldSet* equal) override;
+ void VisitArraySet(HArraySet* equal) override;
+ void VisitTypeConversion(HTypeConversion* instruction) override;
+ void VisitNullCheck(HNullCheck* instruction) override;
+ void VisitArrayLength(HArrayLength* instruction) override;
+ void VisitCheckCast(HCheckCast* instruction) override;
+ void VisitAbs(HAbs* instruction) override;
+ void VisitAdd(HAdd* instruction) override;
+ void VisitAnd(HAnd* instruction) override;
+ void VisitCondition(HCondition* instruction) override;
+ void VisitGreaterThan(HGreaterThan* condition) override;
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* condition) override;
+ void VisitLessThan(HLessThan* condition) override;
+ void VisitLessThanOrEqual(HLessThanOrEqual* condition) override;
+ void VisitBelow(HBelow* condition) override;
+ void VisitBelowOrEqual(HBelowOrEqual* condition) override;
+ void VisitAbove(HAbove* condition) override;
+ void VisitAboveOrEqual(HAboveOrEqual* condition) override;
+ void VisitDiv(HDiv* instruction) override;
+ void VisitMul(HMul* instruction) override;
+ void VisitNeg(HNeg* instruction) override;
+ void VisitNot(HNot* instruction) override;
+ void VisitOr(HOr* instruction) override;
+ void VisitShl(HShl* instruction) override;
+ void VisitShr(HShr* instruction) override;
+ void VisitSub(HSub* instruction) override;
+ void VisitUShr(HUShr* instruction) override;
+ void VisitXor(HXor* instruction) override;
+ void VisitSelect(HSelect* select) override;
+ void VisitIf(HIf* instruction) override;
+ void VisitInstanceOf(HInstanceOf* instruction) override;
+ void VisitInvoke(HInvoke* invoke) override;
+ void VisitDeoptimize(HDeoptimize* deoptimize) override;
+ void VisitVecMul(HVecMul* instruction) override;
bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const;
@@ -116,13 +115,16 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
void SimplifyFP2Int(HInvoke* invoke);
void SimplifyStringCharAt(HInvoke* invoke);
void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
+ void SimplifyStringIndexOf(HInvoke* invoke);
void SimplifyNPEOnArgN(HInvoke* invoke, size_t);
void SimplifyReturnThis(HInvoke* invoke);
void SimplifyAllocationIntrinsic(HInvoke* invoke);
void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
+ void SimplifyMin(HInvoke* invoke, DataType::Type type);
+ void SimplifyMax(HInvoke* invoke, DataType::Type type);
+ void SimplifyAbs(HInvoke* invoke, DataType::Type type);
CodeGenerator* codegen_;
- CompilerDriver* compiler_driver_;
OptimizingCompilerStats* stats_;
bool simplification_occurred_ = false;
int simplifications_at_current_position_ = 0;
@@ -133,17 +135,18 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
static constexpr int kMaxSamePositionSimplifications = 50;
};
-void InstructionSimplifier::Run() {
+bool InstructionSimplifier::Run() {
if (kTestInstructionClonerExhaustively) {
CloneAndReplaceInstructionVisitor visitor(graph_);
visitor.VisitReversePostOrder();
}
- InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_);
- visitor.Run();
+ InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
+ return visitor.Run();
}
-void InstructionSimplifierVisitor::Run() {
+bool InstructionSimplifierVisitor::Run() {
+ bool didSimplify = false;
// Iterate in reverse post order to open up more simplifications to users
// of instructions that got simplified.
for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
@@ -153,10 +156,14 @@ void InstructionSimplifierVisitor::Run() {
do {
simplification_occurred_ = false;
VisitBasicBlock(block);
+ if (simplification_occurred_) {
+ didSimplify = true;
+ }
} while (simplification_occurred_ &&
(simplifications_at_current_position_ < kMaxSamePositionSimplifications));
simplifications_at_current_position_ = 0;
}
+ return didSimplify;
}
namespace {
@@ -365,7 +372,7 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) {
// (as defined by shift semantics). This ensures other
// optimizations do not need to special case for such situations.
DCHECK_EQ(shift_amount->GetType(), DataType::Type::kInt32);
- instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1);
+ instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index= */ 1);
RecordSimplification();
return;
}
@@ -576,7 +583,9 @@ bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInst
// Returns whether doing a type test between the class of `object` against `klass` has
// a statically known outcome. The result of the test is stored in `outcome`.
-static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bool* outcome) {
+static bool TypeCheckHasKnownOutcome(ReferenceTypeInfo class_rti,
+ HInstruction* object,
+ /*out*/bool* outcome) {
DCHECK(!object->IsNullConstant()) << "Null constants should be special cased";
ReferenceTypeInfo obj_rti = object->GetReferenceTypeInfo();
ScopedObjectAccess soa(Thread::Current());
@@ -586,7 +595,6 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
return false;
}
- ReferenceTypeInfo class_rti = klass->GetLoadedClassRTI();
if (!class_rti.IsValid()) {
// Happens when the loaded class is unresolved.
return false;
@@ -611,8 +619,8 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo
void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
HInstruction* object = check_cast->InputAt(0);
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ check_cast->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -627,18 +635,21 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
return;
}
- // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
- // the return value check with the `outcome` check, b/27651442 .
+ // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder
+ // the return value check with the `outcome` check, b/27651442.
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(check_cast->GetTargetClassRTI(), object, &outcome)) {
if (outcome) {
check_cast->GetBlock()->RemoveInstruction(check_cast);
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast);
- if (!load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the checkcast was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (check_cast->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = check_cast->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the checkcast was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
} else {
// Don't do anything for exceptional cases for now. Ideally we should remove
@@ -649,8 +660,8 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) {
void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
HInstruction* object = instruction->InputAt(0);
- HLoadClass* load_class = instruction->InputAt(1)->AsLoadClass();
- if (load_class->NeedsAccessCheck()) {
+ if (instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck &&
+ instruction->GetTargetClass()->NeedsAccessCheck()) {
// If we need to perform an access check we cannot remove the instruction.
return;
}
@@ -670,10 +681,10 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
return;
}
- // Note: The `outcome` is initialized to please valgrind - the compiler can reorder
- // the return value check with the `outcome` check, b/27651442 .
+ // Historical note: The `outcome` was initialized to please Valgrind - the compiler can reorder
+ // the return value check with the `outcome` check, b/27651442.
bool outcome = false;
- if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) {
+ if (TypeCheckHasKnownOutcome(instruction->GetTargetClassRTI(), object, &outcome)) {
MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf);
if (outcome && can_be_null) {
// Type test will succeed, we just need a null test.
@@ -686,11 +697,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) {
}
RecordSimplification();
instruction->GetBlock()->RemoveInstruction(instruction);
- if (outcome && !load_class->HasUses()) {
- // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
- // However, here we know that it cannot because the instanceof check was successfull, hence
- // the class was already loaded.
- load_class->GetBlock()->RemoveInstruction(load_class);
+ if (outcome && instruction->GetTypeCheckKind() != TypeCheckKind::kBitstringCheck) {
+ HLoadClass* load_class = instruction->GetTargetClass();
+ if (!load_class->HasUses()) {
+ // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw.
+ // However, here we know that it cannot because the instanceof check was successfull, hence
+ // the class was already loaded.
+ load_class->GetBlock()->RemoveInstruction(load_class);
+ }
}
}
}
@@ -735,8 +749,8 @@ static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* allocator, HInstr
return new (allocator) HBelowOrEqual(rhs, lhs);
default:
LOG(FATAL) << "Unknown ConditionType " << cond->GetKind();
+ UNREACHABLE();
}
- return nullptr;
}
static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) {
@@ -849,35 +863,29 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) {
static HInstruction* NewIntegralAbs(ArenaAllocator* allocator,
HInstruction* x,
HInstruction* cursor) {
- DataType::Type type = x->GetType();
- DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
- // Construct a fake intrinsic with as much context as is needed to allocate one.
- // The intrinsic will always be lowered into code later anyway.
- // TODO: b/65164101 : moving towards a real HAbs node makes more sense.
- HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
- HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress,
- HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
- 0u
- };
- HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect(
- allocator,
- 1,
- type,
- x->GetDexPc(),
- /*method_idx*/ -1,
- /*resolved_method*/ nullptr,
- dispatch_info,
- kStatic,
- MethodReference(nullptr, dex::kDexNoIndex),
- HInvokeStaticOrDirect::ClinitCheckRequirement::kNone);
- invoke->SetArgumentAt(0, x);
- invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt
- : Intrinsics::kMathAbsLong,
- kNoEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- cursor->GetBlock()->InsertInstructionBefore(invoke, cursor);
- return invoke;
+ DataType::Type type = DataType::Kind(x->GetType());
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+ HAbs* abs = new (allocator) HAbs(type, x, cursor->GetDexPc());
+ cursor->GetBlock()->InsertInstructionBefore(abs, cursor);
+ return abs;
+}
+
+// Constructs a new MIN/MAX(x, y) node in the HIR.
+static HInstruction* NewIntegralMinMax(ArenaAllocator* allocator,
+ HInstruction* x,
+ HInstruction* y,
+ HInstruction* cursor,
+ bool is_min) {
+ DataType::Type type = DataType::Kind(x->GetType());
+ DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
+ HBinaryOperation* minmax = nullptr;
+ if (is_min) {
+ minmax = new (allocator) HMin(type, x, y, cursor->GetDexPc());
+ } else {
+ minmax = new (allocator) HMax(type, x, y, cursor->GetDexPc());
+ }
+ cursor->GetBlock()->InsertInstructionBefore(minmax, cursor);
+ return minmax;
}
// Returns true if operands a and b consists of widening type conversions
@@ -899,6 +907,30 @@ static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInst
to_type == DataType::Type::kInt64);
}
+// Returns an acceptable substitution for "a" on the select
+// construct "a <cmp> b ? c : .." during MIN/MAX recognition.
+static HInstruction* AllowInMinMax(IfCondition cmp,
+ HInstruction* a,
+ HInstruction* b,
+ HInstruction* c) {
+ int64_t value = 0;
+ if (IsInt64AndGet(b, /*out*/ &value) &&
+ (((cmp == kCondLT || cmp == kCondLE) && c->IsMax()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && c->IsMin()))) {
+ HConstant* other = c->AsBinaryOperation()->GetConstantRight();
+ if (other != nullptr && a == c->AsBinaryOperation()->GetLeastConstantLeft()) {
+ int64_t other_value = Int64FromConstant(other);
+ bool is_max = (cmp == kCondLT || cmp == kCondLE);
+ // Allow the max for a < 100 ? max(a, -100) : ..
+ // or the min for a > -100 ? min(a, 100) : ..
+ if (is_max ? (value >= other_value) : (value <= other_value)) {
+ return c;
+ }
+ }
+ }
+ return nullptr;
+}
+
void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
HInstruction* replace_with = nullptr;
HInstruction* condition = select->GetCondition();
@@ -942,23 +974,35 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
DataType::Type t_type = true_value->GetType();
DataType::Type f_type = false_value->GetType();
// Here we have a <cmp> b ? true_value : false_value.
- // Test if both values are same-typed int or long.
- if (t_type == f_type &&
- (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) {
- // Try to replace typical integral ABS constructs.
- if (true_value->IsNeg()) {
- HInstruction* negated = true_value->InputAt(0);
- if ((cmp == kCondLT || cmp == kCondLE) &&
- (a == negated && a == false_value && IsInt64Value(b, 0))) {
- // Found a < 0 ? -a : a which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select);
- }
- } else if (false_value->IsNeg()) {
- HInstruction* negated = false_value->InputAt(0);
- if ((cmp == kCondGT || cmp == kCondGE) &&
- (a == true_value && a == negated && IsInt64Value(b, 0))) {
- // Found a > 0 ? a : -a which can be replaced by ABS(a).
- replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
+ // Test if both values are compatible integral types (resulting MIN/MAX/ABS
+ // type will be int or long, like the condition). Replacements are general,
+ // but assume conditions prefer constants on the right.
+ if (DataType::IsIntegralType(t_type) && DataType::Kind(t_type) == DataType::Kind(f_type)) {
+ // Allow a < 100 ? max(a, -100) : ..
+ // or a > -100 ? min(a, 100) : ..
+ // to use min/max instead of a to detect nested min/max expressions.
+ HInstruction* new_a = AllowInMinMax(cmp, a, b, true_value);
+ if (new_a != nullptr) {
+ a = new_a;
+ }
+ // Try to replace typical integral MIN/MAX/ABS constructs.
+ if ((cmp == kCondLT || cmp == kCondLE || cmp == kCondGT || cmp == kCondGE) &&
+ ((a == true_value && b == false_value) ||
+ (b == true_value && a == false_value))) {
+ // Found a < b ? a : b (MIN) or a < b ? b : a (MAX)
+ // or a > b ? a : b (MAX) or a > b ? b : a (MIN).
+ bool is_min = (cmp == kCondLT || cmp == kCondLE) == (a == true_value);
+ replace_with = NewIntegralMinMax(GetGraph()->GetAllocator(), a, b, select, is_min);
+ } else if (((cmp == kCondLT || cmp == kCondLE) && true_value->IsNeg()) ||
+ ((cmp == kCondGT || cmp == kCondGE) && false_value->IsNeg())) {
+ bool negLeft = (cmp == kCondLT || cmp == kCondLE);
+ HInstruction* the_negated = negLeft ? true_value->InputAt(0) : false_value->InputAt(0);
+ HInstruction* not_negated = negLeft ? false_value : true_value;
+ if (a == the_negated && a == not_negated && IsInt64Value(b, 0)) {
+ // Found a < 0 ? -a : a
+ // or a > 0 ? a : -a
+ // which can be replaced by ABS(a).
+ replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), a, select);
}
} else if (true_value->IsSub() && false_value->IsSub()) {
HInstruction* true_sub1 = true_value->InputAt(0);
@@ -970,8 +1014,8 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) {
((cmp == kCondLT || cmp == kCondLE) &&
(a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) &&
AreLowerPrecisionArgs(t_type, a, b)) {
- // Found a > b ? a - b : b - a or
- // a < b ? b - a : a - b
+ // Found a > b ? a - b : b - a
+ // or a < b ? b - a : a - b
// which can be replaced by ABS(a - b) for lower precision operands a, b.
replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select);
}
@@ -1137,8 +1181,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
HInstruction* input = instruction->GetInput();
DataType::Type input_type = input->GetType();
DataType::Type result_type = instruction->GetResultType();
- if (DataType::IsTypeConversionImplicit(input_type, result_type)) {
- // Remove the implicit conversion; this includes conversion to the same type.
+ if (instruction->IsImplicitConversion()) {
instruction->ReplaceWith(input);
instruction->GetBlock()->RemoveInstruction(instruction);
RecordSimplification();
@@ -1230,6 +1273,17 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct
}
}
+void InstructionSimplifierVisitor::VisitAbs(HAbs* instruction) {
+ HInstruction* input = instruction->GetInput();
+ if (DataType::IsZeroExtension(input->GetType(), instruction->GetResultType())) {
+ // Zero extension from narrow to wide can never set sign bit in the wider
+ // operand, making the subsequent Abs redundant (e.g., abs(b & 0xff) for byte b).
+ instruction->ReplaceWith(input);
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ RecordSimplification();
+ }
+}
+
void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
HConstant* input_cst = instruction->GetConstantRight();
HInstruction* input_other = instruction->GetLeastConstantLeft();
@@ -1262,7 +1316,7 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) {
}
HNeg* neg = left_is_neg ? left->AsNeg() : right->AsNeg();
- if ((left_is_neg ^ right_is_neg) && neg->HasOnlyOneNonEnvironmentUse()) {
+ if (left_is_neg != right_is_neg && neg->HasOnlyOneNonEnvironmentUse()) {
// Replace code looking like
// NEG tmp, b
// ADD dst, a, tmp
@@ -1507,8 +1561,7 @@ static bool RecognizeAndSimplifyClassCheck(HCondition* condition) {
{
ScopedObjectAccess soa(Thread::Current());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
if (field_get->GetFieldInfo().GetField() != field) {
return false;
@@ -2092,22 +2145,6 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) {
ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo();
if (argument_rti.IsValid() && argument_rti.IsStringClass()) {
optimizations.SetArgumentIsString();
- } else if (kUseReadBarrier) {
- DCHECK(instruction->GetResolvedMethod() != nullptr);
- DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass() ||
- // Object.equals() can be devirtualized to String.equals().
- instruction->GetResolvedMethod()->GetDeclaringClass()->IsObjectClass());
- Runtime* runtime = Runtime::Current();
- // For AOT, we always assume that the boot image shall contain the String.class and
- // we do not need a read barrier for boot image classes as they are non-moveable.
- // For JIT, check if we actually have a boot image; if we do, the String.class
- // should also be non-moveable.
- if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) {
- DCHECK(runtime->IsAotCompiler() ||
- !runtime->GetHeap()->IsMovableObject(
- instruction->GetResolvedMethod()->GetDeclaringClass()));
- optimizations.SetNoReadBarrierForStringClass();
- }
}
}
}
@@ -2214,7 +2251,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
PointerSize image_size = class_linker->GetImagePointerSize();
HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
- mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass();
+ ObjPtr<mirror::Class> system = invoke->GetResolvedMethod()->GetDeclaringClass();
ArtMethod* method = nullptr;
switch (source_component_type) {
case DataType::Type::kBool:
@@ -2252,7 +2289,7 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction)
// the invoke, as we would need to look it up in the current dex file, and it
// is unlikely that it exists. The most usual situation for such typed
// arraycopy methods is a direct pointer to the boot image.
- HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_);
+ invoke->SetDispatchInfo(HSharpening::SharpenInvokeStaticOrDirect(method, codegen_));
}
}
}
@@ -2324,17 +2361,17 @@ void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) {
ArenaAllocator* allocator = GetGraph()->GetAllocator();
// We treat String as an array to allow DCE and BCE to seamlessly work on strings,
// so create the HArrayLength, HBoundsCheck and HArrayGet.
- HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length */ true);
+ HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length= */ true);
invoke->GetBlock()->InsertInstructionBefore(length, invoke);
HBoundsCheck* bounds_check = new (allocator) HBoundsCheck(
- index, length, dex_pc, /* is_string_char_at */ true);
+ index, length, dex_pc, /* is_string_char_at= */ true);
invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke);
HArrayGet* array_get = new (allocator) HArrayGet(str,
bounds_check,
DataType::Type::kUint16,
SideEffects::None(), // Strings are immutable.
dex_pc,
- /* is_string_char_at */ true);
+ /* is_string_char_at= */ true);
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get);
bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment());
GetGraph()->SetHasBoundsChecks(true);
@@ -2346,7 +2383,7 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke
// We treat String as an array to allow DCE and BCE to seamlessly work on strings,
// so create the HArrayLength.
HArrayLength* length =
- new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length */ true);
+ new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length= */ true);
HInstruction* replacement;
if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) {
// For String.isEmpty(), create the `HEqual` representing the `length == 0`.
@@ -2361,6 +2398,43 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
}
+void InstructionSimplifierVisitor::SimplifyStringIndexOf(HInvoke* invoke) {
+ DCHECK(invoke->GetIntrinsic() == Intrinsics::kStringIndexOf ||
+ invoke->GetIntrinsic() == Intrinsics::kStringIndexOfAfter);
+ if (invoke->InputAt(0)->IsLoadString()) {
+ HLoadString* load_string = invoke->InputAt(0)->AsLoadString();
+ const DexFile& dex_file = load_string->GetDexFile();
+ uint32_t utf16_length;
+ const char* data =
+ dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), &utf16_length);
+ if (utf16_length == 0) {
+ invoke->ReplaceWith(GetGraph()->GetIntConstant(-1));
+ invoke->GetBlock()->RemoveInstruction(invoke);
+ RecordSimplification();
+ return;
+ }
+ if (utf16_length == 1 && invoke->GetIntrinsic() == Intrinsics::kStringIndexOf) {
+ // Simplify to HSelect(HEquals(., load_string.charAt(0)), 0, -1).
+ // If the sought character is supplementary, this gives the correct result, i.e. -1.
+ uint32_t c = GetUtf16FromUtf8(&data);
+ DCHECK_EQ(GetTrailingUtf16Char(c), 0u);
+ DCHECK_EQ(GetLeadingUtf16Char(c), c);
+ uint32_t dex_pc = invoke->GetDexPc();
+ ArenaAllocator* allocator = GetGraph()->GetAllocator();
+ HEqual* equal =
+ new (allocator) HEqual(invoke->InputAt(1), GetGraph()->GetIntConstant(c), dex_pc);
+ invoke->GetBlock()->InsertInstructionBefore(equal, invoke);
+ HSelect* result = new (allocator) HSelect(equal,
+ GetGraph()->GetIntConstant(0),
+ GetGraph()->GetIntConstant(-1),
+ dex_pc);
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, result);
+ RecordSimplification();
+ return;
+ }
+ }
+}
+
// This method should only be used on intrinsics whose sole way of throwing an
// exception is raising a NPE when the nth argument is null. If that argument
// is provably non-null, we can clear the flag.
@@ -2430,6 +2504,27 @@ void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke,
invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier);
}
+void InstructionSimplifierVisitor::SimplifyMin(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMin* min = new (GetGraph()->GetAllocator())
+ HMin(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, min);
+}
+
+void InstructionSimplifierVisitor::SimplifyMax(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HMax* max = new (GetGraph()->GetAllocator())
+ HMax(type, invoke->InputAt(0), invoke->InputAt(1), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, max);
+}
+
+void InstructionSimplifierVisitor::SimplifyAbs(HInvoke* invoke, DataType::Type type) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ HAbs* abs = new (GetGraph()->GetAllocator())
+ HAbs(type, invoke->InputAt(0), invoke->GetDexPc());
+ invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, abs);
+}
+
void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
switch (instruction->GetIntrinsic()) {
case Intrinsics::kStringEquals:
@@ -2439,28 +2534,28 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
SimplifySystemArrayCopy(instruction);
break;
case Intrinsics::kIntegerRotateRight:
- SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt32);
+ SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt32);
break;
case Intrinsics::kLongRotateRight:
- SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt64);
+ SimplifyRotate(instruction, /* is_left= */ false, DataType::Type::kInt64);
break;
case Intrinsics::kIntegerRotateLeft:
- SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt32);
+ SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt32);
break;
case Intrinsics::kLongRotateLeft:
- SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt64);
+ SimplifyRotate(instruction, /* is_left= */ true, DataType::Type::kInt64);
break;
case Intrinsics::kIntegerCompare:
- SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt32);
+ SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt32);
break;
case Intrinsics::kLongCompare:
- SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt64);
+ SimplifyCompare(instruction, /* is_signum= */ false, DataType::Type::kInt64);
break;
case Intrinsics::kIntegerSignum:
- SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt32);
+ SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt32);
break;
case Intrinsics::kLongSignum:
- SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt64);
+ SimplifyCompare(instruction, /* is_signum= */ true, DataType::Type::kInt64);
break;
case Intrinsics::kFloatIsNaN:
case Intrinsics::kDoubleIsNaN:
@@ -2477,6 +2572,10 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kStringLength:
SimplifyStringIsEmptyOrLength(instruction);
break;
+ case Intrinsics::kStringIndexOf:
+ case Intrinsics::kStringIndexOfAfter:
+ SimplifyStringIndexOf(instruction);
+ break;
case Intrinsics::kStringStringIndexOf:
case Intrinsics::kStringStringIndexOfAfter:
SimplifyNPEOnArgN(instruction, 1); // 0th has own NullCheck
@@ -2513,6 +2612,42 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) {
case Intrinsics::kVarHandleStoreStoreFence:
SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore);
break;
+ case Intrinsics::kMathMinIntInt:
+ SimplifyMin(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMinLongLong:
+ SimplifyMin(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMinFloatFloat:
+ SimplifyMin(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMinDoubleDouble:
+ SimplifyMin(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathMaxIntInt:
+ SimplifyMax(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathMaxLongLong:
+ SimplifyMax(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathMaxFloatFloat:
+ SimplifyMax(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathMaxDoubleDouble:
+ SimplifyMax(instruction, DataType::Type::kFloat64);
+ break;
+ case Intrinsics::kMathAbsInt:
+ SimplifyAbs(instruction, DataType::Type::kInt32);
+ break;
+ case Intrinsics::kMathAbsLong:
+ SimplifyAbs(instruction, DataType::Type::kInt64);
+ break;
+ case Intrinsics::kMathAbsFloat:
+ SimplifyAbs(instruction, DataType::Type::kFloat32);
+ break;
+ case Intrinsics::kMathAbsDouble:
+ SimplifyAbs(instruction, DataType::Type::kFloat64);
+ break;
default:
break;
}
@@ -2553,10 +2688,10 @@ bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation(
HConstant* const2;
HBinaryOperation* y;
- if (instruction->InstructionTypeEquals(left) && right->IsConstant()) {
+ if (instruction->GetKind() == left->GetKind() && right->IsConstant()) {
const2 = right->AsConstant();
y = left->AsBinaryOperation();
- } else if (left->IsConstant() && instruction->InstructionTypeEquals(right)) {
+ } else if (left->IsConstant() && instruction->GetKind() == right->GetKind()) {
const2 = left->AsConstant();
y = right->AsBinaryOperation();
} else {
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 5e2045580b..982a24a6f0 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -24,7 +24,6 @@
namespace art {
class CodeGenerator;
-class CompilerDriver;
/**
* Implements optimizations specific to each instruction.
@@ -40,20 +39,17 @@ class InstructionSimplifier : public HOptimization {
public:
InstructionSimplifier(HGraph* graph,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
OptimizingCompilerStats* stats = nullptr,
const char* name = kInstructionSimplifierPassName)
: HOptimization(graph, name, stats),
- codegen_(codegen),
- compiler_driver_(compiler_driver) {}
+ codegen_(codegen) {}
static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
- void Run() OVERRIDE;
+ bool Run() override;
private:
CodeGenerator* codegen_;
- CompilerDriver* compiler_driver_;
DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
};
diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc
index 92081e30b1..01e9cff6d8 100644
--- a/compiler/optimizing/instruction_simplifier_arm.cc
+++ b/compiler/optimizing/instruction_simplifier_arm.cc
@@ -43,11 +43,11 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
bool TryMergeIntoUsersShifterOperand(HInstruction* instruction);
bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge);
bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
- return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false);
}
bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
- return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true);
}
/**
@@ -56,7 +56,7 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
* (2) Since statements can be removed in a "forward" fashion,
* the visitor should test if each statement is still there.
*/
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
// TODO: fragile iteration, provide more robust iterators?
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
@@ -66,15 +66,15 @@ class InstructionSimplifierArmVisitor : public HGraphVisitor {
}
}
- void VisitAnd(HAnd* instruction) OVERRIDE;
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
- void VisitArraySet(HArraySet* instruction) OVERRIDE;
- void VisitMul(HMul* instruction) OVERRIDE;
- void VisitOr(HOr* instruction) OVERRIDE;
- void VisitShl(HShl* instruction) OVERRIDE;
- void VisitShr(HShr* instruction) OVERRIDE;
- void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
- void VisitUShr(HUShr* instruction) OVERRIDE;
+ void VisitAnd(HAnd* instruction) override;
+ void VisitArrayGet(HArrayGet* instruction) override;
+ void VisitArraySet(HArraySet* instruction) override;
+ void VisitMul(HMul* instruction) override;
+ void VisitOr(HOr* instruction) override;
+ void VisitShl(HShl* instruction) override;
+ void VisitShr(HShr* instruction) override;
+ void VisitTypeConversion(HTypeConversion* instruction) override;
+ void VisitUShr(HUShr* instruction) override;
OptimizingCompilerStats* stats_;
};
@@ -202,6 +202,11 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) {
return;
}
+ // TODO: Support intermediate address for object arrays on arm.
+ if (type == DataType::Type::kReference) {
+ return;
+ }
+
if (type == DataType::Type::kInt64
|| type == DataType::Type::kFloat32
|| type == DataType::Type::kFloat64) {
@@ -283,9 +288,10 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) {
}
}
-void InstructionSimplifierArm::Run() {
+bool InstructionSimplifierArm::Run() {
InstructionSimplifierArmVisitor visitor(graph_, stats_);
visitor.VisitReversePostOrder();
+ return true;
}
} // namespace arm
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 2f6572931f..fca9341d59 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -30,7 +30,7 @@ class InstructionSimplifierArm : public HOptimization {
static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
- void Run() OVERRIDE;
+ bool Run() override;
};
} // namespace arm
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index 1c44e5ac49..e23decbd71 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -45,11 +45,11 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
HInstruction* bitfield_op,
bool do_merge);
bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
- return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ false);
}
bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) {
DCHECK(CanMergeIntoShifterOperand(use, bitfield_op));
- return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true);
+ return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge= */ true);
}
/**
@@ -58,7 +58,7 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
* (2) Since statements can be removed in a "forward" fashion,
* the visitor should test if each statement is still there.
*/
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
// TODO: fragile iteration, provide more robust iterators?
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
@@ -69,18 +69,18 @@ class InstructionSimplifierArm64Visitor : public HGraphVisitor {
}
// HInstruction visitors, sorted alphabetically.
- void VisitAnd(HAnd* instruction) OVERRIDE;
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
- void VisitArraySet(HArraySet* instruction) OVERRIDE;
- void VisitMul(HMul* instruction) OVERRIDE;
- void VisitOr(HOr* instruction) OVERRIDE;
- void VisitShl(HShl* instruction) OVERRIDE;
- void VisitShr(HShr* instruction) OVERRIDE;
- void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE;
- void VisitUShr(HUShr* instruction) OVERRIDE;
- void VisitXor(HXor* instruction) OVERRIDE;
- void VisitVecLoad(HVecLoad* instruction) OVERRIDE;
- void VisitVecStore(HVecStore* instruction) OVERRIDE;
+ void VisitAnd(HAnd* instruction) override;
+ void VisitArrayGet(HArrayGet* instruction) override;
+ void VisitArraySet(HArraySet* instruction) override;
+ void VisitMul(HMul* instruction) override;
+ void VisitOr(HOr* instruction) override;
+ void VisitShl(HShl* instruction) override;
+ void VisitShr(HShr* instruction) override;
+ void VisitTypeConversion(HTypeConversion* instruction) override;
+ void VisitUShr(HUShr* instruction) override;
+ void VisitXor(HXor* instruction) override;
+ void VisitVecLoad(HVecLoad* instruction) override;
+ void VisitVecStore(HVecStore* instruction) override;
OptimizingCompilerStats* stats_;
};
@@ -278,9 +278,10 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) {
}
}
-void InstructionSimplifierArm64::Run() {
+bool InstructionSimplifierArm64::Run() {
InstructionSimplifierArm64Visitor visitor(graph_, stats_);
visitor.VisitReversePostOrder();
+ return true;
}
} // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index d180a8dc46..8d93c01ebf 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -30,7 +30,7 @@ class InstructionSimplifierArm64 : public HOptimization {
static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
- void Run() OVERRIDE;
+ bool Run() override;
};
} // namespace arm64
diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc
index fa97401a0c..5d0c63b76b 100644
--- a/compiler/optimizing/instruction_simplifier_mips.cc
+++ b/compiler/optimizing/instruction_simplifier_mips.cc
@@ -39,8 +39,8 @@ class InstructionSimplifierMipsVisitor : public HGraphVisitor {
bool TryExtractArrayAccessIndex(HInstruction* access,
HInstruction* index,
DataType::Type packed_type);
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE;
- void VisitArraySet(HArraySet* instruction) OVERRIDE;
+ void VisitArrayGet(HArrayGet* instruction) override;
+ void VisitArraySet(HArraySet* instruction) override;
OptimizingCompilerStats* stats_;
CodeGeneratorMIPS* codegen_;
@@ -131,9 +131,10 @@ void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) {
}
}
-void InstructionSimplifierMips::Run() {
+bool InstructionSimplifierMips::Run() {
InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_);
visitor.VisitReversePostOrder();
+ return true;
}
} // namespace mips
diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h
index 6cb8affe85..b431334811 100644
--- a/compiler/optimizing/instruction_simplifier_mips.h
+++ b/compiler/optimizing/instruction_simplifier_mips.h
@@ -35,7 +35,7 @@ class InstructionSimplifierMips : public HOptimization {
static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips";
- void Run() OVERRIDE;
+ bool Run() override;
private:
CodeGeneratorMIPS* codegen_;
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index ccdcb3532d..0f30f662cd 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -245,11 +245,11 @@ bool TryExtractArrayAccessAddress(HInstruction* access,
return false;
}
if (kEmitCompilerReadBarrier &&
+ !kUseBakerReadBarrier &&
access->IsArrayGet() &&
access->GetType() == DataType::Type::kReference) {
- // For object arrays, the read barrier instrumentation requires
+ // For object arrays, the non-Baker read barrier instrumentation requires
// the original array pointer.
- // TODO: This can be relaxed for Baker CC.
return false;
}
diff --git a/compiler/optimizing/instruction_simplifier_x86.cc b/compiler/optimizing/instruction_simplifier_x86.cc
new file mode 100644
index 0000000000..2d8f94a85b
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.cc
@@ -0,0 +1,88 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86.h"
+#include "instruction_simplifier_x86_shared.h"
+#include "code_generator_x86.h"
+
+namespace art {
+
+namespace x86 {
+
+class InstructionSimplifierX86Visitor : public HGraphVisitor {
+ public:
+ InstructionSimplifierX86Visitor(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ codegen_(down_cast<CodeGeneratorX86*>(codegen)),
+ stats_(stats) {}
+
+ void RecordSimplification() {
+ MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
+ }
+
+ bool HasAVX2() {
+ return (codegen_->GetInstructionSetFeatures().HasAVX2());
+ }
+
+ void VisitBasicBlock(HBasicBlock* block) override {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsInBlock()) {
+ instruction->Accept(this);
+ }
+ }
+ }
+
+ void VisitAnd(HAnd * instruction) override;
+ void VisitXor(HXor* instruction) override;
+
+ private:
+ CodeGeneratorX86* codegen_;
+ OptimizingCompilerStats* stats_;
+};
+
+
+void InstructionSimplifierX86Visitor::VisitAnd(HAnd* instruction) {
+ if (TryCombineAndNot(instruction)) {
+ RecordSimplification();
+ } else if (instruction->GetResultType() == DataType::Type::kInt32) {
+ if (TryGenerateResetLeastSetBit(instruction)) {
+ RecordSimplification();
+ }
+ }
+}
+
+void InstructionSimplifierX86Visitor::VisitXor(HXor* instruction) {
+ if (instruction->GetResultType() == DataType::Type::kInt32) {
+ if (TryGenerateMaskUptoLeastSetBit(instruction)) {
+ RecordSimplification();
+ }
+ }
+}
+
+bool InstructionSimplifierX86::Run() {
+ InstructionSimplifierX86Visitor visitor(graph_, codegen_, stats_);
+ if (visitor.HasAVX2()) {
+ visitor.VisitReversePostOrder();
+ return true;
+ }
+ return false;
+}
+
+} // namespace x86
+} // namespace art
+
diff --git a/compiler/optimizing/instruction_simplifier_x86.h b/compiler/optimizing/instruction_simplifier_x86.h
new file mode 100644
index 0000000000..6f10006db2
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86.h
@@ -0,0 +1,44 @@
+/*Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+namespace x86 {
+
+class InstructionSimplifierX86 : public HOptimization {
+ public:
+ InstructionSimplifierX86(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kInstructionSimplifierX86PassName, stats),
+ codegen_(codegen) {}
+
+ static constexpr const char* kInstructionSimplifierX86PassName = "instruction_simplifier_x86";
+
+ bool Run() override;
+
+ private:
+ CodeGenerator* codegen_;
+};
+
+} // namespace x86
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_H_
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.cc b/compiler/optimizing/instruction_simplifier_x86_64.cc
new file mode 100644
index 0000000000..56c6b414d7
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_64.cc
@@ -0,0 +1,82 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86_64.h"
+#include "instruction_simplifier_x86_shared.h"
+#include "code_generator_x86_64.h"
+
+namespace art {
+
+namespace x86_64 {
+
+class InstructionSimplifierX86_64Visitor : public HGraphVisitor {
+ public:
+ InstructionSimplifierX86_64Visitor(HGraph* graph,
+ CodeGenerator* codegen,
+ OptimizingCompilerStats* stats)
+ : HGraphVisitor(graph),
+ codegen_(down_cast<CodeGeneratorX86_64*>(codegen)),
+ stats_(stats) {}
+
+ void RecordSimplification() {
+ MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch);
+ }
+
+ bool HasAVX2() {
+ return codegen_->GetInstructionSetFeatures().HasAVX2();
+ }
+
+ void VisitBasicBlock(HBasicBlock* block) override {
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (instruction->IsInBlock()) {
+ instruction->Accept(this);
+ }
+ }
+ }
+
+ void VisitAnd(HAnd* instruction) override;
+ void VisitXor(HXor* instruction) override;
+
+ private:
+ CodeGeneratorX86_64* codegen_;
+ OptimizingCompilerStats* stats_;
+};
+
+void InstructionSimplifierX86_64Visitor::VisitAnd(HAnd* instruction) {
+ if (TryCombineAndNot(instruction)) {
+ RecordSimplification();
+ } else if (TryGenerateResetLeastSetBit(instruction)) {
+ RecordSimplification();
+ }
+}
+
+
+void InstructionSimplifierX86_64Visitor::VisitXor(HXor* instruction) {
+ if (TryGenerateMaskUptoLeastSetBit(instruction)) {
+ RecordSimplification();
+ }
+}
+
+bool InstructionSimplifierX86_64::Run() {
+ InstructionSimplifierX86_64Visitor visitor(graph_, codegen_, stats_);
+ if (visitor.HasAVX2()) {
+ visitor.VisitReversePostOrder();
+ return true;
+ }
+ return false;
+}
+} // namespace x86_64
+} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86_64.h b/compiler/optimizing/instruction_simplifier_x86_64.h
new file mode 100644
index 0000000000..6cae24d11a
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_64.h
@@ -0,0 +1,48 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+class CodeGenerator;
+
+namespace x86_64 {
+
+class InstructionSimplifierX86_64 : public HOptimization {
+ public:
+ InstructionSimplifierX86_64(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
+ : HOptimization(graph, kInstructionSimplifierX86_64PassName, stats),
+ codegen_(codegen) {}
+
+ static constexpr const char* kInstructionSimplifierX86_64PassName =
+ "instruction_simplifier_x86_64";
+
+ bool Run() override;
+
+ private:
+ CodeGenerator* codegen_;
+};
+
+} // namespace x86_64
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_64_H_
+
+
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.cc b/compiler/optimizing/instruction_simplifier_x86_shared.cc
new file mode 100644
index 0000000000..2805abb2bb
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.cc
@@ -0,0 +1,137 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "instruction_simplifier_x86_shared.h"
+#include "nodes_x86.h"
+
+namespace art {
+
+bool TryCombineAndNot(HAnd* instruction) {
+ DataType::Type type = instruction->GetType();
+ if (!DataType::IsIntOrLongType(type)) {
+ return false;
+ }
+ // Replace code looking like
+ // Not tmp, y
+ // And dst, x, tmp
+ // with
+ // AndNot dst, x, y
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ // Perform simplication only when either left or right
+ // is Not. When both are Not, instruction should be simplified with
+ // DeMorgan's Laws.
+ if (left->IsNot() ^ right->IsNot()) {
+ bool left_is_not = left->IsNot();
+ HInstruction* other_ins = (left_is_not ? right : left);
+ HNot* not_ins = (left_is_not ? left : right)->AsNot();
+ // Only do the simplification if instruction has only one use
+ // and thus can be safely removed.
+ if (not_ins->HasOnlyOneNonEnvironmentUse()) {
+ ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+ HX86AndNot* and_not = new (arena) HX86AndNot(type,
+ not_ins->GetInput(),
+ other_ins,
+ instruction->GetDexPc());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, and_not);
+ DCHECK(!not_ins->HasUses());
+ not_ins->GetBlock()->RemoveInstruction(not_ins);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool TryGenerateResetLeastSetBit(HAnd* instruction) {
+ DataType::Type type = instruction->GetType();
+ if (!DataType::IsIntOrLongType(type)) {
+ return false;
+ }
+ // Replace code looking like
+ // Add tmp, x, -1 or Sub tmp, x, 1
+ // And dest x, tmp
+ // with
+ // MaskOrResetLeastSetBit dest, x
+ HInstruction* candidate = nullptr;
+ HInstruction* other = nullptr;
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ if (AreLeastSetBitInputs(left, right)) {
+ candidate = left;
+ other = right;
+ } else if (AreLeastSetBitInputs(right, left)) {
+ candidate = right;
+ other = left;
+ }
+ if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) {
+ ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+ HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit(
+ type, HInstruction::kAnd, other, instruction->GetDexPc());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb);
+ DCHECK(!candidate->HasUses());
+ candidate->GetBlock()->RemoveInstruction(candidate);
+ return true;
+ }
+ return false;
+}
+
+bool TryGenerateMaskUptoLeastSetBit(HXor* instruction) {
+ DataType::Type type = instruction->GetType();
+ if (!DataType::IsIntOrLongType(type)) {
+ return false;
+ }
+ // Replace code looking like
+ // Add tmp, x, -1 or Sub tmp, x, 1
+ // Xor dest x, tmp
+ // with
+ // MaskOrResetLeastSetBit dest, x
+ HInstruction* left = instruction->GetLeft();
+ HInstruction* right = instruction->GetRight();
+ HInstruction* other = nullptr;
+ HInstruction* candidate = nullptr;
+ if (AreLeastSetBitInputs(left, right)) {
+ candidate = left;
+ other = right;
+ } else if (AreLeastSetBitInputs(right, left)) {
+ candidate = right;
+ other = left;
+ }
+ if (candidate != nullptr && candidate->HasOnlyOneNonEnvironmentUse()) {
+ ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetAllocator();
+ HX86MaskOrResetLeastSetBit* lsb = new (arena) HX86MaskOrResetLeastSetBit(
+ type, HInstruction::kXor, other, instruction->GetDexPc());
+ instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, lsb);
+ DCHECK(!candidate->HasUses());
+ candidate->GetBlock()->RemoveInstruction(candidate);
+ return true;
+ }
+ return false;
+}
+
+bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other) {
+ if (to_test->IsAdd()) {
+ HAdd* add = to_test->AsAdd();
+ HConstant* cst = add->GetConstantRight();
+ return cst != nullptr && cst->IsMinusOne() && other == add->GetLeastConstantLeft();
+ }
+ if (to_test->IsSub()) {
+ HSub* sub = to_test->AsSub();
+ HConstant* cst = sub->GetConstantRight();
+ return cst != nullptr && cst->IsOne() && other == sub->GetLeastConstantLeft();
+ }
+ return false;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/instruction_simplifier_x86_shared.h b/compiler/optimizing/instruction_simplifier_x86_shared.h
new file mode 100644
index 0000000000..7f94d7ea4c
--- /dev/null
+++ b/compiler/optimizing/instruction_simplifier_x86_shared.h
@@ -0,0 +1,29 @@
+/* Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+
+#include "nodes.h"
+
+namespace art {
+bool TryCombineAndNot(HAnd* instruction);
+bool TryGenerateResetLeastSetBit(HAnd* instruction);
+bool TryGenerateMaskUptoLeastSetBit(HXor* instruction);
+bool AreLeastSetBitInputs(HInstruction* to_test, HInstruction* other);
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_X86_SHARED_H_
+
diff --git a/compiler/optimizing/intrinsic_objects.cc b/compiler/optimizing/intrinsic_objects.cc
new file mode 100644
index 0000000000..c345624a7a
--- /dev/null
+++ b/compiler/optimizing/intrinsic_objects.cc
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsic_objects.h"
+
+#include "art_field-inl.h"
+#include "base/logging.h"
+#include "class_root.h"
+#include "handle.h"
+#include "obj_ptr-inl.h"
+#include "mirror/object_array-alloc-inl.h"
+#include "mirror/object_array-inl.h"
+
+namespace art {
+
+static ObjPtr<mirror::ObjectArray<mirror::Object>> LookupIntegerCache(Thread* self,
+ ClassLinker* class_linker)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ObjPtr<mirror::Class> integer_cache_class = class_linker->LookupClass(
+ self, "Ljava/lang/Integer$IntegerCache;", /* class_loader= */ nullptr);
+ if (integer_cache_class == nullptr || !integer_cache_class->IsInitialized()) {
+ return nullptr;
+ }
+ ArtField* cache_field =
+ integer_cache_class->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+ CHECK(cache_field != nullptr);
+ ObjPtr<mirror::ObjectArray<mirror::Object>> integer_cache =
+ ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(
+ cache_field->GetObject(integer_cache_class));
+ CHECK(integer_cache != nullptr);
+ return integer_cache;
+}
+
+ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::AllocateBootImageLiveObjects(
+ Thread* self,
+ ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_) {
+ // The objects used for the Integer.valueOf() intrinsic must remain live even if references
+ // to them are removed using reflection. Image roots are not accessible through reflection,
+ // so the array we construct here shall keep them alive.
+ StackHandleScope<1> hs(self);
+ Handle<mirror::ObjectArray<mirror::Object>> integer_cache =
+ hs.NewHandle(LookupIntegerCache(self, class_linker));
+ size_t live_objects_size =
+ (integer_cache != nullptr) ? (/* cache */ 1u + integer_cache->GetLength()) : 0u;
+ ObjPtr<mirror::ObjectArray<mirror::Object>> live_objects =
+ mirror::ObjectArray<mirror::Object>::Alloc(
+ self, GetClassRoot<mirror::ObjectArray<mirror::Object>>(class_linker), live_objects_size);
+ int32_t index = 0;
+ if (integer_cache != nullptr) {
+ live_objects->Set(index++, integer_cache.Get());
+ for (int32_t i = 0, length = integer_cache->GetLength(); i != length; ++i) {
+ live_objects->Set(index++, integer_cache->Get(i));
+ }
+ }
+ CHECK_EQ(index, live_objects->GetLength());
+
+ if (kIsDebugBuild && integer_cache != nullptr) {
+ CHECK_EQ(integer_cache.Get(), GetIntegerValueOfCache(live_objects));
+ for (int32_t i = 0, len = integer_cache->GetLength(); i != len; ++i) {
+ CHECK_EQ(integer_cache->GetWithoutChecks(i), GetIntegerValueOfObject(live_objects, i));
+ }
+ }
+ return live_objects;
+}
+
+ObjPtr<mirror::ObjectArray<mirror::Object>> IntrinsicObjects::GetIntegerValueOfCache(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) {
+ DCHECK(boot_image_live_objects != nullptr);
+ if (boot_image_live_objects->GetLength() == 0u) {
+ return nullptr; // No intrinsic objects.
+ }
+ // No need for read barrier for boot image object or for verifying the value that was just stored.
+ ObjPtr<mirror::Object> result =
+ boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(0);
+ DCHECK(result != nullptr);
+ DCHECK(result->IsObjectArray());
+ DCHECK(result->GetClass()->DescriptorEquals("[Ljava/lang/Integer;"));
+ return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(result);
+}
+
+ObjPtr<mirror::Object> IntrinsicObjects::GetIntegerValueOfObject(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ uint32_t index) {
+ DCHECK(boot_image_live_objects != nullptr);
+ DCHECK_NE(boot_image_live_objects->GetLength(), 0);
+ DCHECK_LT(index,
+ static_cast<uint32_t>(GetIntegerValueOfCache(boot_image_live_objects)->GetLength()));
+
+ // No need for read barrier for boot image object or for verifying the value that was just stored.
+ ObjPtr<mirror::Object> result =
+ boot_image_live_objects->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(
+ /* skip the IntegerCache.cache */ 1u + index);
+ DCHECK(result != nullptr);
+ DCHECK(result->GetClass()->DescriptorEquals("Ljava/lang/Integer;"));
+ return result;
+}
+
+MemberOffset IntrinsicObjects::GetIntegerValueOfArrayDataOffset(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects) {
+ DCHECK_NE(boot_image_live_objects->GetLength(), 0);
+ MemberOffset result = mirror::ObjectArray<mirror::Object>::OffsetOfElement(1u);
+ DCHECK_EQ(GetIntegerValueOfObject(boot_image_live_objects, 0u),
+ (boot_image_live_objects
+ ->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(result)));
+ return result;
+}
+
+} // namespace art
diff --git a/compiler/optimizing/intrinsic_objects.h b/compiler/optimizing/intrinsic_objects.h
new file mode 100644
index 0000000000..863017be38
--- /dev/null
+++ b/compiler/optimizing/intrinsic_objects.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_
+
+#include "base/bit_field.h"
+#include "base/bit_utils.h"
+#include "base/mutex.h"
+
+namespace art {
+
+class ClassLinker;
+template <class MirrorType> class ObjPtr;
+class MemberOffset;
+class Thread;
+
+namespace mirror {
+class Object;
+template <class T> class ObjectArray;
+} // namespace mirror
+
+class IntrinsicObjects {
+ public:
+ enum class PatchType {
+ kIntegerValueOfObject,
+ kIntegerValueOfArray,
+
+ kLast = kIntegerValueOfArray
+ };
+
+ static uint32_t EncodePatch(PatchType patch_type, uint32_t index = 0u) {
+ DCHECK(patch_type == PatchType::kIntegerValueOfObject || index == 0u);
+ return PatchTypeField::Encode(static_cast<uint32_t>(patch_type)) | IndexField::Encode(index);
+ }
+
+ static PatchType DecodePatchType(uint32_t intrinsic_data) {
+ return static_cast<PatchType>(PatchTypeField::Decode(intrinsic_data));
+ }
+
+ static uint32_t DecodePatchIndex(uint32_t intrinsic_data) {
+ return IndexField::Decode(intrinsic_data);
+ }
+
+ static ObjPtr<mirror::ObjectArray<mirror::Object>> AllocateBootImageLiveObjects(
+ Thread* self,
+ ClassLinker* class_linker) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ // Functions for retrieving data for Integer.valueOf().
+ static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerValueOfCache(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+ static ObjPtr<mirror::Object> GetIntegerValueOfObject(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ uint32_t index) REQUIRES_SHARED(Locks::mutator_lock_);
+ static MemberOffset GetIntegerValueOfArrayDataOffset(
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects)
+ REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+ static constexpr size_t kPatchTypeBits =
+ MinimumBitsToStore(static_cast<uint32_t>(PatchType::kLast));
+ static constexpr size_t kIndexBits = BitSizeOf<uint32_t>() - kPatchTypeBits;
+ using PatchTypeField = BitField<uint32_t, 0u, kPatchTypeBits>;
+ using IndexField = BitField<uint32_t, kPatchTypeBits, kIndexBits>;
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_INTRINSIC_OBJECTS_H_
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index f8dc316e45..d9401050df 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -20,309 +20,355 @@
#include "art_method-inl.h"
#include "base/utils.h"
#include "class_linker.h"
+#include "class_root.h"
#include "dex/invoke_type.h"
-#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
-#include "mirror/dex_cache-inl.h"
+#include "gc/space/image_space.h"
+#include "image-inl.h"
+#include "intrinsic_objects.h"
#include "nodes.h"
+#include "obj_ptr-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
namespace art {
-// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags.
-#define CHECK_INTRINSICS_ENUM_VALUES(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- static_assert( \
- static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
- "Instrinsics enumeration space overflow.");
-#include "intrinsics_list.h"
- INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
-#undef INTRINSICS_LIST
-#undef CHECK_INTRINSICS_ENUM_VALUES
-
-// Function that returns whether an intrinsic is static/direct or virtual.
-static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) {
- switch (i) {
+std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
+ switch (intrinsic) {
case Intrinsics::kNone:
- return kInterface; // Non-sensical for intrinsic.
+ os << "None";
+ break;
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
case Intrinsics::k ## Name: \
- return IsStatic;
+ os << # Name; \
+ break;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
+#undef STATIC_INTRINSICS_LIST
+#undef VIRTUAL_INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
}
- return kInterface;
+ return os;
}
-// Function that returns whether an intrinsic needs an environment or not.
-static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsics i) {
- switch (i) {
- case Intrinsics::kNone:
- return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic.
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- case Intrinsics::k ## Name: \
- return NeedsEnvironmentOrCache;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
-#undef OPTIMIZING_INTRINSICS
- }
- return kNeedsEnvironmentOrCache;
+static const char kIntegerCacheDescriptor[] = "Ljava/lang/Integer$IntegerCache;";
+static const char kIntegerDescriptor[] = "Ljava/lang/Integer;";
+static const char kIntegerArrayDescriptor[] = "[Ljava/lang/Integer;";
+static const char kLowFieldName[] = "low";
+static const char kHighFieldName[] = "high";
+static const char kValueFieldName[] = "value";
+
+static ObjPtr<mirror::ObjectArray<mirror::Object>> GetBootImageLiveObjects()
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ const std::vector<gc::space::ImageSpace*>& boot_image_spaces = heap->GetBootImageSpaces();
+ DCHECK(!boot_image_spaces.empty());
+ const ImageHeader& main_header = boot_image_spaces[0]->GetImageHeader();
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects =
+ ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(
+ main_header.GetImageRoot<kWithoutReadBarrier>(ImageHeader::kBootImageLiveObjects));
+ DCHECK(boot_image_live_objects != nullptr);
+ DCHECK(heap->ObjectIsInBootImageSpace(boot_image_live_objects));
+ return boot_image_live_objects;
}
-// Function that returns whether an intrinsic has side effects.
-static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) {
- switch (i) {
- case Intrinsics::kNone:
- return kAllSideEffects;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- case Intrinsics::k ## Name: \
- return SideEffects;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
-#undef OPTIMIZING_INTRINSICS
- }
- return kAllSideEffects;
+static ObjPtr<mirror::Class> LookupInitializedClass(Thread* self,
+ ClassLinker* class_linker,
+ const char* descriptor)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ObjPtr<mirror::Class> klass =
+ class_linker->LookupClass(self, descriptor, /* class_loader= */ nullptr);
+ DCHECK(klass != nullptr);
+ DCHECK(klass->IsInitialized());
+ return klass;
}
-// Function that returns whether an intrinsic can throw exceptions.
-static inline IntrinsicExceptions GetExceptions(Intrinsics i) {
- switch (i) {
- case Intrinsics::kNone:
- return kCanThrow;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- case Intrinsics::k ## Name: \
- return Exceptions;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef INTRINSICS_LIST
-#undef OPTIMIZING_INTRINSICS
- }
- return kCanThrow;
+static ObjPtr<mirror::ObjectArray<mirror::Object>> GetIntegerCacheArray(
+ ObjPtr<mirror::Class> cache_class) REQUIRES_SHARED(Locks::mutator_lock_) {
+ ArtField* cache_field = cache_class->FindDeclaredStaticField("cache", kIntegerArrayDescriptor);
+ DCHECK(cache_field != nullptr);
+ return ObjPtr<mirror::ObjectArray<mirror::Object>>::DownCast(cache_field->GetObject(cache_class));
}
-static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke)
+static int32_t GetIntegerCacheField(ObjPtr<mirror::Class> cache_class, const char* field_name)
REQUIRES_SHARED(Locks::mutator_lock_) {
- // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual.
- //
- // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization
- // failure occured. We might be in a situation where we have inlined a method that calls an
- // intrinsic, but that method is in a different dex file on which we do not have a
- // verified_method that would have helped the compiler driver sharpen the call. In that case,
- // make sure that the intrinsic is actually for some final method (or in a final class), as
- // otherwise the intrinsics setup is broken.
- //
- // For the last direction, we have intrinsics for virtual functions that will perform a check
- // inline. If the precise type is known, however, the instruction will be sharpened to an
- // InvokeStaticOrDirect.
- InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic);
- InvokeType invoke_type = invoke->GetInvokeType();
-
- switch (intrinsic_type) {
- case kStatic:
- return (invoke_type == kStatic);
-
- case kDirect:
- if (invoke_type == kDirect) {
- return true;
- }
- if (invoke_type == kVirtual) {
- ArtMethod* art_method = invoke->GetResolvedMethod();
- return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal());
- }
- return false;
-
- case kVirtual:
- // Call might be devirtualized.
- return (invoke_type == kVirtual || invoke_type == kDirect || invoke_type == kInterface);
-
- case kSuper:
- case kInterface:
- case kPolymorphic:
- return false;
- }
- LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type;
- UNREACHABLE();
+ ArtField* field = cache_class->FindDeclaredStaticField(field_name, "I");
+ DCHECK(field != nullptr);
+ return field->GetInt(cache_class);
}
-bool IntrinsicsRecognizer::Recognize(HInvoke* invoke,
- ArtMethod* art_method,
- /*out*/ bool* wrong_invoke_type) {
- if (art_method == nullptr) {
- art_method = invoke->GetResolvedMethod();
- }
- *wrong_invoke_type = false;
- if (art_method == nullptr || !art_method->IsIntrinsic()) {
- return false;
- }
-
- // TODO: b/65872996 The intent is that polymorphic signature methods should
- // be compiler intrinsics. At present, they are only interpreter intrinsics.
- if (art_method->IsPolymorphicSignature()) {
- return false;
+static bool CheckIntegerCache(Thread* self,
+ ClassLinker* class_linker,
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects,
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_cache)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(boot_image_cache != nullptr);
+
+ // Since we have a cache in the boot image, both java.lang.Integer and
+ // java.lang.Integer$IntegerCache must be initialized in the boot image.
+ ObjPtr<mirror::Class> cache_class =
+ LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
+ ObjPtr<mirror::Class> integer_class =
+ LookupInitializedClass(self, class_linker, kIntegerDescriptor);
+
+ // Check that the current cache is the same as the `boot_image_cache`.
+ ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class);
+ if (current_cache != boot_image_cache) {
+ return false; // Messed up IntegerCache.cache.
}
- Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
- if (CheckInvokeType(intrinsic, invoke) == false) {
- *wrong_invoke_type = true;
- return false;
+ // Check that the range matches the boot image cache length.
+ int32_t low = GetIntegerCacheField(cache_class, kLowFieldName);
+ int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
+ if (boot_image_cache->GetLength() != high - low + 1) {
+ return false; // Messed up IntegerCache.low or IntegerCache.high.
}
- invoke->SetIntrinsic(intrinsic,
- NeedsEnvironmentOrCache(intrinsic),
- GetSideEffects(intrinsic),
- GetExceptions(intrinsic));
- return true;
-}
-
-void IntrinsicsRecognizer::Run() {
- ScopedObjectAccess soa(Thread::Current());
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
- inst_it.Advance()) {
- HInstruction* inst = inst_it.Current();
- if (inst->IsInvoke()) {
- bool wrong_invoke_type = false;
- if (Recognize(inst->AsInvoke(), /* art_method */ nullptr, &wrong_invoke_type)) {
- MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized);
- } else if (wrong_invoke_type) {
- LOG(WARNING)
- << "Found an intrinsic with unexpected invoke type: "
- << inst->AsInvoke()->GetResolvedMethod()->PrettyMethod() << " "
- << inst->DebugName();
- }
- }
+ // Check that the elements match the boot image intrinsic objects and check their values as well.
+ ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
+ DCHECK(value_field != nullptr);
+ for (int32_t i = 0, len = boot_image_cache->GetLength(); i != len; ++i) {
+ ObjPtr<mirror::Object> boot_image_object =
+ IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, i);
+ DCHECK(Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boot_image_object));
+ // No need for read barrier for comparison with a boot image object.
+ ObjPtr<mirror::Object> current_object =
+ boot_image_cache->GetWithoutChecks<kVerifyNone, kWithoutReadBarrier>(i);
+ if (boot_image_object != current_object) {
+ return false; // Messed up IntegerCache.cache[i]
+ }
+ if (value_field->GetInt(boot_image_object) != low + i) {
+ return false; // Messed up IntegerCache.cache[i].value.
}
}
-}
-std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) {
- switch (intrinsic) {
- case Intrinsics::kNone:
- os << "None";
- break;
-#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- case Intrinsics::k ## Name: \
- os << # Name; \
- break;
-#include "intrinsics_list.h"
- INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
-#undef STATIC_INTRINSICS_LIST
-#undef VIRTUAL_INTRINSICS_LIST
-#undef OPTIMIZING_INTRINSICS
- }
- return os;
+ return true;
}
void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
CodeGenerator* codegen,
Location return_location,
Location first_argument_location) {
- if (Runtime::Current()->IsAotCompiler()) {
- if (codegen->GetCompilerOptions().IsBootImage() ||
- codegen->GetCompilerOptions().GetCompilePic()) {
- // TODO(ngeoffray): Support boot image compilation.
+ // The intrinsic will call if it needs to allocate a j.l.Integer.
+ LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
+ const CompilerOptions& compiler_options = codegen->GetCompilerOptions();
+ if (compiler_options.IsBootImage()) {
+ // Piggyback on the method load kind to determine whether we can use PC-relative addressing.
+ // This should cover both the testing config (non-PIC boot image) and codegens that reject
+ // PC-relative load kinds and fall back to the runtime call.
+ if (!invoke->AsInvokeStaticOrDirect()->HasPcRelativeMethodLoadKind()) {
+ return;
+ }
+ if (!compiler_options.IsImageClass(kIntegerCacheDescriptor) ||
+ !compiler_options.IsImageClass(kIntegerDescriptor)) {
+ return;
+ }
+ ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ ObjPtr<mirror::Class> cache_class = class_linker->LookupClass(
+ self, kIntegerCacheDescriptor, /* class_loader= */ nullptr);
+ DCHECK(cache_class != nullptr);
+ if (UNLIKELY(!cache_class->IsInitialized())) {
+ LOG(WARNING) << "Image class " << cache_class->PrettyDescriptor() << " is uninitialized.";
+ return;
+ }
+ ObjPtr<mirror::Class> integer_class =
+ class_linker->LookupClass(self, kIntegerDescriptor, /* class_loader= */ nullptr);
+ DCHECK(integer_class != nullptr);
+ if (UNLIKELY(!integer_class->IsInitialized())) {
+ LOG(WARNING) << "Image class " << integer_class->PrettyDescriptor() << " is uninitialized.";
return;
}
+ int32_t low = GetIntegerCacheField(cache_class, kLowFieldName);
+ int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
+ if (kIsDebugBuild) {
+ ObjPtr<mirror::ObjectArray<mirror::Object>> current_cache = GetIntegerCacheArray(cache_class);
+ CHECK(current_cache != nullptr);
+ CHECK_EQ(current_cache->GetLength(), high - low + 1);
+ ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
+ CHECK(value_field != nullptr);
+ for (int32_t i = 0, len = current_cache->GetLength(); i != len; ++i) {
+ ObjPtr<mirror::Object> current_object = current_cache->GetWithoutChecks(i);
+ CHECK(current_object != nullptr);
+ CHECK_EQ(value_field->GetInt(current_object), low + i);
+ }
+ }
+ if (invoke->InputAt(0)->IsIntConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
+ static_cast<uint32_t>(high - low + 1)) {
+ // No call, we shall use direct pointer to the Integer object.
+ call_kind = LocationSummary::kNoCall;
+ }
+ }
+ } else {
+ Runtime* runtime = Runtime::Current();
+ if (runtime->GetHeap()->GetBootImageSpaces().empty()) {
+ return; // Running without boot image, cannot use required boot image objects.
+ }
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects();
+ ObjPtr<mirror::ObjectArray<mirror::Object>> cache =
+ IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects);
+ if (cache == nullptr) {
+ return; // No cache in the boot image.
+ }
+ if (runtime->UseJitCompilation()) {
+ if (!CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache)) {
+ return; // The cache was somehow messed up, probably by using reflection.
+ }
+ } else {
+ DCHECK(runtime->IsAotCompiler());
+ DCHECK(CheckIntegerCache(self, runtime->GetClassLinker(), boot_image_live_objects, cache));
+ if (invoke->InputAt(0)->IsIntConstant()) {
+ int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ // Retrieve the `value` from the lowest cached Integer.
+ ObjPtr<mirror::Object> low_integer =
+ IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
+ ObjPtr<mirror::Class> integer_class =
+ low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>();
+ ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
+ DCHECK(value_field != nullptr);
+ int32_t low = value_field->GetInt(low_integer);
+ if (static_cast<uint32_t>(value) - static_cast<uint32_t>(low) <
+ static_cast<uint32_t>(cache->GetLength())) {
+ // No call, we shall use direct pointer to the Integer object. Note that we cannot
+ // do this for JIT as the "low" can change through reflection before emitting the code.
+ call_kind = LocationSummary::kNoCall;
+ }
+ }
+ }
}
- IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
-
- // Most common case is that we have found all we needed (classes are initialized
- // and in the boot image). Bail if not.
- if (info.integer_cache == nullptr ||
- info.integer == nullptr ||
- info.cache == nullptr ||
- info.value_offset == 0 ||
- // low and high cannot be 0, per the spec.
- info.low == 0 ||
- info.high == 0) {
- LOG(INFO) << "Integer.valueOf will not be optimized";
- return;
+ ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
+ LocationSummary* locations = new (allocator) LocationSummary(invoke, call_kind, kIntrinsified);
+ if (call_kind == LocationSummary::kCallOnMainOnly) {
+ locations->SetInAt(0, Location::RegisterOrConstant(invoke->InputAt(0)));
+ locations->AddTemp(first_argument_location);
+ locations->SetOut(return_location);
+ } else {
+ locations->SetInAt(0, Location::ConstantLocation(invoke->InputAt(0)->AsConstant()));
+ locations->SetOut(Location::RequiresRegister());
}
+}
- // The intrinsic will call if it needs to allocate a j.l.Integer.
- LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary(
- invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
- if (!invoke->InputAt(0)->IsConstant()) {
- locations->SetInAt(0, Location::RequiresRegister());
- }
- locations->AddTemp(first_argument_location);
- locations->SetOut(return_location);
+static int32_t GetIntegerCacheLowFromIntegerCache(Thread* self, ClassLinker* class_linker)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ObjPtr<mirror::Class> cache_class =
+ LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
+ return GetIntegerCacheField(cache_class, kLowFieldName);
}
-IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+static uint32_t CalculateBootImageOffset(ObjPtr<mirror::Object> object)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ gc::Heap* heap = Runtime::Current()->GetHeap();
+ DCHECK(heap->ObjectIsInBootImageSpace(object));
+ return reinterpret_cast<const uint8_t*>(object.Ptr()) - heap->GetBootImageSpaces()[0]->Begin();
+}
+
+inline IntrinsicVisitor::IntegerValueOfInfo::IntegerValueOfInfo()
+ : value_offset(0),
+ low(0),
+ length(0u),
+ integer_boot_image_offset(kInvalidReference),
+ value_boot_image_reference(kInvalidReference) {}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo(
+ HInvoke* invoke, const CompilerOptions& compiler_options) {
// Note that we could cache all of the data looked up here. but there's no good
// location for it. We don't want to add it to WellKnownClasses, to avoid creating global
// jni values. Adding it as state to the compiler singleton seems like wrong
// separation of concerns.
// The need for this data should be pretty rare though.
- // The most common case is that the classes are in the boot image and initialized,
- // which is easy to generate code for. We bail if not.
- Thread* self = Thread::Current();
- ScopedObjectAccess soa(self);
+ // Note that at this point we can no longer abort the code generation. Therefore,
+ // we need to provide data that shall not lead to a crash even if the fields were
+ // modified through reflection since ComputeIntegerValueOfLocations() when JITting.
+
Runtime* runtime = Runtime::Current();
ClassLinker* class_linker = runtime->GetClassLinker();
- gc::Heap* heap = runtime->GetHeap();
- IntegerValueOfInfo info;
- info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
- if (info.integer_cache == nullptr) {
- self->ClearException();
- return info;
- }
- if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
- // Optimization only works if the class is initialized and in the boot image.
- return info;
- }
- info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
- if (info.integer == nullptr) {
- self->ClearException();
- return info;
- }
- if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
- // Optimization only works if the class is initialized and in the boot image.
- return info;
- }
-
- ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
- if (field == nullptr) {
- return info;
- }
- info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
- field->GetObject(info.integer_cache).Ptr());
- if (info.cache == nullptr) {
- return info;
- }
-
- if (!heap->ObjectIsInBootImageSpace(info.cache)) {
- // Optimization only works if the object is in the boot image.
- return info;
- }
+ Thread* self = Thread::Current();
+ ScopedObjectAccess soa(self);
- field = info.integer->FindDeclaredInstanceField("value", "I");
- if (field == nullptr) {
- return info;
+ IntegerValueOfInfo info;
+ if (compiler_options.IsBootImage()) {
+ ObjPtr<mirror::Class> integer_class =
+ LookupInitializedClass(self, class_linker, kIntegerDescriptor);
+ ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
+ DCHECK(value_field != nullptr);
+ info.value_offset = value_field->GetOffset().Uint32Value();
+ ObjPtr<mirror::Class> cache_class =
+ LookupInitializedClass(self, class_linker, kIntegerCacheDescriptor);
+ info.low = GetIntegerCacheField(cache_class, kLowFieldName);
+ int32_t high = GetIntegerCacheField(cache_class, kHighFieldName);
+ info.length = dchecked_integral_cast<uint32_t>(high - info.low + 1);
+
+ info.integer_boot_image_offset = IntegerValueOfInfo::kInvalidReference;
+ if (invoke->InputAt(0)->IsIntConstant()) {
+ int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low);
+ if (index < static_cast<uint32_t>(info.length)) {
+ info.value_boot_image_reference = IntrinsicObjects::EncodePatch(
+ IntrinsicObjects::PatchType::kIntegerValueOfObject, index);
+ } else {
+ // Not in the cache.
+ info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference;
+ }
+ } else {
+ info.array_data_boot_image_reference =
+ IntrinsicObjects::EncodePatch(IntrinsicObjects::PatchType::kIntegerValueOfArray);
+ }
+ } else {
+ ObjPtr<mirror::ObjectArray<mirror::Object>> boot_image_live_objects = GetBootImageLiveObjects();
+ ObjPtr<mirror::Object> low_integer =
+ IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, 0u);
+ ObjPtr<mirror::Class> integer_class = low_integer->GetClass<kVerifyNone, kWithoutReadBarrier>();
+ ArtField* value_field = integer_class->FindDeclaredInstanceField(kValueFieldName, "I");
+ DCHECK(value_field != nullptr);
+ info.value_offset = value_field->GetOffset().Uint32Value();
+ if (runtime->UseJitCompilation()) {
+ // Use the current `IntegerCache.low` for JIT to avoid truly surprising behavior if the
+ // code messes up the `value` field in the lowest cached Integer using reflection.
+ info.low = GetIntegerCacheLowFromIntegerCache(self, class_linker);
+ } else {
+ // For app AOT, the `low_integer->value` should be the same as `IntegerCache.low`.
+ info.low = value_field->GetInt(low_integer);
+ DCHECK_EQ(info.low, GetIntegerCacheLowFromIntegerCache(self, class_linker));
+ }
+ // Do not look at `IntegerCache.high`, use the immutable length of the cache array instead.
+ info.length = dchecked_integral_cast<uint32_t>(
+ IntrinsicObjects::GetIntegerValueOfCache(boot_image_live_objects)->GetLength());
+
+ info.integer_boot_image_offset = CalculateBootImageOffset(integer_class);
+ if (invoke->InputAt(0)->IsIntConstant()) {
+ int32_t input_value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+ uint32_t index = static_cast<uint32_t>(input_value) - static_cast<uint32_t>(info.low);
+ if (index < static_cast<uint32_t>(info.length)) {
+ ObjPtr<mirror::Object> integer =
+ IntrinsicObjects::GetIntegerValueOfObject(boot_image_live_objects, index);
+ info.value_boot_image_reference = CalculateBootImageOffset(integer);
+ } else {
+ // Not in the cache.
+ info.value_boot_image_reference = IntegerValueOfInfo::kInvalidReference;
+ }
+ } else {
+ info.array_data_boot_image_reference =
+ CalculateBootImageOffset(boot_image_live_objects) +
+ IntrinsicObjects::GetIntegerValueOfArrayDataOffset(boot_image_live_objects).Uint32Value();
+ }
}
- info.value_offset = field->GetOffset().Int32Value();
- field = info.integer_cache->FindDeclaredStaticField("low", "I");
- if (field == nullptr) {
- return info;
- }
- info.low = field->GetInt(info.integer_cache);
+ return info;
+}
- field = info.integer_cache->FindDeclaredStaticField("high", "I");
- if (field == nullptr) {
- return info;
+void IntrinsicVisitor::AssertNonMovableStringClass() {
+ if (kIsDebugBuild) {
+ ScopedObjectAccess soa(Thread::Current());
+ ObjPtr<mirror::Class> string_class = GetClassRoot<art::mirror::String>();
+ CHECK(!art::Runtime::Current()->GetHeap()->IsMovableObject(string_class));
}
- info.high = field->GetInt(info.integer_cache);
-
- DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
- return info;
}
} // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 62991435c7..ab68cce304 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -24,7 +24,6 @@
namespace art {
-class CompilerDriver;
class DexFile;
// Positive floating-point infinities.
@@ -34,28 +33,6 @@ static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000)
static constexpr uint32_t kNanFloat = 0x7fc00000U;
static constexpr uint64_t kNanDouble = 0x7ff8000000000000;
-// Recognize intrinsics from HInvoke nodes.
-class IntrinsicsRecognizer : public HOptimization {
- public:
- IntrinsicsRecognizer(HGraph* graph,
- OptimizingCompilerStats* stats,
- const char* name = kIntrinsicsRecognizerPassName)
- : HOptimization(graph, name, stats) {}
-
- void Run() OVERRIDE;
-
- // Static helper that recognizes intrinsic call. Returns true on success.
- // If it fails due to invoke type mismatch, wrong_invoke_type is set.
- // Useful to recognize intrinsics on individual calls outside this full pass.
- static bool Recognize(HInvoke* invoke, ArtMethod* method, /*out*/ bool* wrong_invoke_type)
- REQUIRES_SHARED(Locks::mutator_lock_);
-
- static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition";
-
- private:
- DISALLOW_COPY_AND_ASSIGN(IntrinsicsRecognizer);
-};
-
class IntrinsicVisitor : public ValueObject {
public:
virtual ~IntrinsicVisitor() {}
@@ -126,37 +103,47 @@ class IntrinsicVisitor : public ValueObject {
Location return_location,
Location first_argument_location);
- // Temporary data structure for holding Integer.valueOf useful data. We only
- // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
- // mirror::Class pointers in this structure.
+ // Temporary data structure for holding Integer.valueOf data for generating code.
+ // We only use it if the boot image contains the IntegerCache objects.
struct IntegerValueOfInfo {
- IntegerValueOfInfo()
- : integer_cache(nullptr),
- integer(nullptr),
- cache(nullptr),
- low(0),
- high(0),
- value_offset(0) {}
-
- // The java.lang.IntegerCache class.
- mirror::Class* integer_cache;
- // The java.lang.Integer class.
- mirror::Class* integer;
- // Value of java.lang.IntegerCache#cache.
- mirror::ObjectArray<mirror::Object>* cache;
- // Value of java.lang.IntegerCache#low.
+ static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1);
+
+ IntegerValueOfInfo();
+
+ // Offset of the Integer.value field for initializing a newly allocated instance.
+ uint32_t value_offset;
+ // The low value in the cache.
int32_t low;
- // Value of java.lang.IntegerCache#high.
- int32_t high;
- // The offset of java.lang.Integer.value.
- int32_t value_offset;
+ // The length of the cache array.
+ uint32_t length;
+
+ // Boot image offset of java.lang.Integer for allocating an instance.
+ uint32_t integer_boot_image_offset; // Set to kInvalidReference when compiling the boot image.
+
+ // This union contains references to the boot image. For app AOT or JIT compilation,
+ // these are the boot image offsets of the target. For boot image compilation, the
+ // location shall be known only at link time, so we encode a symbolic reference using
+ // IntrinsicObjects::EncodePatch().
+ union {
+ // The target value for a constant input in the cache range. If the constant input
+ // is out of range (use `low` and `length` to check), this value is bogus (set to
+ // kInvalidReference) and the code must allocate a new Integer.
+ uint32_t value_boot_image_reference;
+
+ // The cache array data used for a non-constant input in the cache range.
+ // If the input is out of range, the code must allocate a new Integer.
+ uint32_t array_data_boot_image_reference;
+ };
};
- static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+ static IntegerValueOfInfo ComputeIntegerValueOfInfo(
+ HInvoke* invoke, const CompilerOptions& compiler_options);
protected:
IntrinsicVisitor() {}
+ static void AssertNonMovableStringClass();
+
private:
DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
};
@@ -211,7 +198,6 @@ class StringEqualsOptimizations : public IntrinsicOptimizations {
INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0);
INTRINSIC_OPTIMIZATION(ArgumentIsString, 1);
- INTRINSIC_OPTIMIZATION(NoReadBarrierForStringClass, 2);
private:
DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations);
@@ -255,17 +241,33 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNU
// Defines a list of unreached intrinsics: that is, method calls that are recognized as
// an intrinsic, and then always converted into HIR instructions before they reach any
-// architecture-specific intrinsics code generator.
+// architecture-specific intrinsics code generator. This only applies to non-baseline
+// compilation.
#define UNREACHABLE_INTRINSIC(Arch, Name) \
void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke) { \
- LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \
- << " should have been converted to HIR"; \
+ if (Runtime::Current()->IsAotCompiler() && \
+ !codegen_->GetCompilerOptions().IsBaseline()) { \
+ LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \
+ << " should have been converted to HIR"; \
+ } \
} \
void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \
LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \
<< " should have been converted to HIR"; \
}
#define UNREACHABLE_INTRINSICS(Arch) \
+UNREACHABLE_INTRINSIC(Arch, MathMinIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMinLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMinFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMinDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxIntInt) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxLongLong) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxFloatFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathMaxDoubleDouble) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsInt) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsLong) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsFloat) \
+UNREACHABLE_INTRINSIC(Arch, MathAbsDouble) \
UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \
UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \
UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index 0e6485be9f..ec5d17a443 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -112,7 +112,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
explicit IntrinsicSlowPathARM64(HInvoke* invoke)
: SlowPathCodeARM64(invoke), invoke_(invoke) { }
- void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
__ Bind(GetEntryLabel());
@@ -145,7 +145,7 @@ class IntrinsicSlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathARM64"; }
+ const char* GetDescription() const override { return "IntrinsicSlowPathARM64"; }
private:
// The instruction where this slow path is happening.
@@ -163,7 +163,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
DCHECK(kUseBakerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
CodeGeneratorARM64* codegen = down_cast<CodeGeneratorARM64*>(codegen_in);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -216,7 +216,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
+ const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathARM64"; }
private:
Location tmp_;
@@ -272,10 +272,10 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke
}
void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
}
void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
}
void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -286,10 +286,10 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
}
void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -344,14 +344,6 @@ void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler());
}
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
static void GenNumberOfLeadingZeros(LocationSummary* locations,
DataType::Type type,
MacroAssembler* masm) {
@@ -536,168 +528,6 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
}
-static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* masm) {
- Location in = locations->InAt(0);
- Location out = locations->Out();
-
- FPRegister in_reg = is64bit ? DRegisterFrom(in) : SRegisterFrom(in);
- FPRegister out_reg = is64bit ? DRegisterFrom(out) : SRegisterFrom(out);
-
- __ Fabs(out_reg, in_reg);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
- bool is64bit,
- MacroAssembler* masm) {
- Location in = locations->InAt(0);
- Location output = locations->Out();
-
- Register in_reg = is64bit ? XRegisterFrom(in) : WRegisterFrom(in);
- Register out_reg = is64bit ? XRegisterFrom(output) : WRegisterFrom(output);
-
- __ Cmp(in_reg, Operand(0));
- __ Cneg(out_reg, in_reg, lt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- FPRegister op1_reg = is_double ? DRegisterFrom(op1) : SRegisterFrom(op1);
- FPRegister op2_reg = is_double ? DRegisterFrom(op2) : SRegisterFrom(op2);
- FPRegister out_reg = is_double ? DRegisterFrom(out) : SRegisterFrom(out);
- if (is_min) {
- __ Fmin(out_reg, op1_reg, op2_reg);
- } else {
- __ Fmax(out_reg, op1_reg, op2_reg);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetVIXLAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- bool is_long,
- MacroAssembler* masm) {
- Location op1 = locations->InAt(0);
- Location op2 = locations->InAt(1);
- Location out = locations->Out();
-
- Register op1_reg = is_long ? XRegisterFrom(op1) : WRegisterFrom(op1);
- Register op2_reg = is_long ? XRegisterFrom(op2) : WRegisterFrom(op2);
- Register out_reg = is_long ? XRegisterFrom(out) : WRegisterFrom(out);
-
- __ Cmp(op1_reg, op2_reg);
- __ Csel(out_reg, op1_reg, op2_reg, is_min ? lt : gt);
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetVIXLAssembler());
-}
-
-void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetVIXLAssembler());
-}
-
void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
@@ -788,7 +618,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) {
- GenMathRound(invoke, /* is_double */ true, GetVIXLAssembler());
+ GenMathRound(invoke, /* is_double= */ true, GetVIXLAssembler());
}
void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
@@ -796,7 +626,7 @@ void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) {
- GenMathRound(invoke, /* is_double */ false, GetVIXLAssembler());
+ GenMathRound(invoke, /* is_double= */ false, GetVIXLAssembler());
}
void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) {
@@ -915,20 +745,20 @@ static void GenUnsafeGet(HInvoke* invoke,
if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
Register temp = WRegisterFrom(locations->GetTemp(0));
- codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
- trg_loc,
- base,
- /* offset */ 0u,
- /* index */ offset_loc,
- /* scale_factor */ 0u,
- temp,
- /* needs_null_check */ false,
- is_volatile);
+ MacroAssembler* masm = codegen->GetVIXLAssembler();
+ // Piggy-back on the field load path using introspection for the Baker read barrier.
+ __ Add(temp, base, offset.W()); // Offset should not exceed 32 bits.
+ codegen->GenerateFieldLoadWithBakerReadBarrier(invoke,
+ trg_loc,
+ base,
+ MemOperand(temp.X()),
+ /* needs_null_check= */ false,
+ is_volatile);
} else {
// Other cases.
MemOperand mem_op(base.X(), offset);
if (is_volatile) {
- codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check */ true);
+ codegen->LoadAcquire(invoke, trg, mem_op, /* needs_null_check= */ true);
} else {
codegen->Load(type, trg, mem_op);
}
@@ -952,9 +782,9 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* in
kIntrinsified);
if (can_call && kUseBakerReadBarrier) {
locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
- // We need a temporary register for the read barrier marking slow
- // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
- locations->AddTemp(Location::RequiresRegister());
+ // We need a temporary register for the read barrier load in order to use
+ // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier().
+ locations->AddTemp(FixedTempLocation());
}
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
@@ -983,22 +813,22 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invok
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -1066,7 +896,7 @@ static void GenUnsafePut(HInvoke* invoke,
}
if (is_volatile || is_ordered) {
- codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check */ false);
+ codegen->StoreRelease(invoke, type, source, mem_op, /* needs_null_check= */ false);
} else {
codegen->Store(type, source, mem_op);
}
@@ -1081,64 +911,64 @@ static void GenUnsafePut(HInvoke* invoke,
void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt32,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kReference,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke,
DataType::Type::kInt64,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1154,106 +984,155 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output. Likewise when
- // emitting a (Baker) read barrier, which may call.
- Location::OutputOverlap overlaps =
- ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap;
- locations->SetOut(Location::RequiresRegister(), overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // Temporary register for (Baker) read barrier.
+ // We need two non-scratch temporary registers for (Baker) read barrier.
+ locations->AddTemp(Location::RequiresRegister());
locations->AddTemp(Location::RequiresRegister());
}
}
+class BakerReadBarrierCasSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ explicit BakerReadBarrierCasSlowPathARM64(HInvoke* invoke)
+ : SlowPathCodeARM64(invoke) {}
+
+ const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARM64"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+ Arm64Assembler* assembler = arm64_codegen->GetAssembler();
+ MacroAssembler* masm = assembler->GetVIXLAssembler();
+ __ Bind(GetEntryLabel());
+
+ // Get the locations.
+ LocationSummary* locations = instruction_->GetLocations();
+ Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
+ Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
+ Register expected = WRegisterFrom(locations->InAt(3)); // Expected.
+ Register value = WRegisterFrom(locations->InAt(4)); // Value.
+
+ Register old_value = WRegisterFrom(locations->GetTemp(0)); // The old value from main path.
+ Register marked = WRegisterFrom(locations->GetTemp(1)); // The marked old value.
+
+ // Mark the `old_value` from the main path and compare with `expected`. This clobbers the
+ // `tmp_ptr` scratch register but we do not want to allocate another non-scratch temporary.
+ arm64_codegen->GenerateUnsafeCasOldValueMovWithBakerReadBarrier(marked, old_value);
+ __ Cmp(marked, expected);
+ __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
+
+ // The `old_value` we have read did not match `expected` (which is always a to-space reference)
+ // but after the read barrier in GenerateUnsafeCasOldValueMovWithBakerReadBarrier() the marked
+ // to-space value matched, so the `old_value` must be a from-space reference to the same
+ // object. Do the same CAS loop as the main path but check for both `expected` and the unmarked
+ // old value representing the to-space and from-space references for the same object.
+
+ UseScratchRegisterScope temps(masm);
+ Register tmp_ptr = temps.AcquireX();
+ Register tmp = temps.AcquireSameSizeAs(value);
+
+ // Recalculate the `tmp_ptr` clobbered above.
+ __ Add(tmp_ptr, base.X(), Operand(offset));
+
+ // do {
+ // tmp_value = [tmp_ptr];
+ // } while ((tmp_value == expected || tmp == old_value) && failure([tmp_ptr] <- r_new_value));
+ // result = (tmp_value == expected || tmp == old_value);
+
+ vixl::aarch64::Label loop_head;
+ __ Bind(&loop_head);
+ __ Ldaxr(tmp, MemOperand(tmp_ptr));
+ assembler->MaybeUnpoisonHeapReference(tmp);
+ __ Cmp(tmp, expected);
+ __ Ccmp(tmp, old_value, ZFlag, ne);
+ __ B(GetExitLabel(), ne); // If taken, Z=false indicates failure.
+ assembler->MaybePoisonHeapReference(value);
+ __ Stlxr(tmp.W(), value, MemOperand(tmp_ptr));
+ assembler->MaybeUnpoisonHeapReference(value);
+ __ Cbnz(tmp.W(), &loop_head);
+
+ // Z=true from the above CMP+CCMP indicates success.
+ __ B(GetExitLabel());
+ }
+};
+
static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) {
- MacroAssembler* masm = codegen->GetVIXLAssembler();
+ Arm64Assembler* assembler = codegen->GetAssembler();
+ MacroAssembler* masm = assembler->GetVIXLAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location out_loc = locations->Out();
- Register out = WRegisterFrom(out_loc); // Boolean result.
-
- Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
- Location offset_loc = locations->InAt(2);
- Register offset = XRegisterFrom(offset_loc); // Long offset.
- Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
- Register value = RegisterFrom(locations->InAt(4), type); // Value.
+ Register out = WRegisterFrom(locations->Out()); // Boolean result.
+ Register base = WRegisterFrom(locations->InAt(1)); // Object pointer.
+ Register offset = XRegisterFrom(locations->InAt(2)); // Long offset.
+ Register expected = RegisterFrom(locations->InAt(3), type); // Expected.
+ Register value = RegisterFrom(locations->InAt(4), type); // Value.
// This needs to be before the temp registers, as MarkGCCard also uses VIXL temps.
if (type == DataType::Type::kReference) {
// Mark card for object assuming new value is stored.
bool value_can_be_null = true; // TODO: Worth finding out this information?
codegen->MarkGCCard(base, value, value_can_be_null);
-
- // The only read barrier implementation supporting the
- // UnsafeCASObject intrinsic is the Baker-style read barriers.
- DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
-
- if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- Register temp = WRegisterFrom(locations->GetTemp(0));
- // Need to make sure the reference stored in the field is a to-space
- // one before attempting the CAS or the CAS could fail incorrectly.
- codegen->UpdateReferenceFieldWithBakerReadBarrier(
- invoke,
- out_loc, // Unused, used only as a "temporary" within the read barrier.
- base,
- /* field_offset */ offset_loc,
- temp,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
- }
}
UseScratchRegisterScope temps(masm);
Register tmp_ptr = temps.AcquireX(); // Pointer to actual memory.
- Register tmp_value = temps.AcquireSameSizeAs(value); // Value in memory.
+ Register old_value; // Value in memory.
- Register tmp_32 = tmp_value.W();
+ vixl::aarch64::Label exit_loop_label;
+ vixl::aarch64::Label* exit_loop = &exit_loop_label;
+ vixl::aarch64::Label* failure = &exit_loop_label;
- __ Add(tmp_ptr, base.X(), Operand(offset));
+ if (kEmitCompilerReadBarrier && type == DataType::Type::kReference) {
+ // The only read barrier implementation supporting the
+ // UnsafeCASObject intrinsic is the Baker-style read barriers.
+ DCHECK(kUseBakerReadBarrier);
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- codegen->GetAssembler()->PoisonHeapReference(value);
- }
+ BakerReadBarrierCasSlowPathARM64* slow_path =
+ new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARM64(invoke);
+ codegen->AddSlowPath(slow_path);
+ exit_loop = slow_path->GetExitLabel();
+ failure = slow_path->GetEntryLabel();
+ // We need to store the `old_value` in a non-scratch register to make sure
+ // the Baker read barrier in the slow path does not clobber it.
+ old_value = WRegisterFrom(locations->GetTemp(0));
+ } else {
+ old_value = temps.AcquireSameSizeAs(value);
}
+ __ Add(tmp_ptr, base.X(), Operand(offset));
+
// do {
- // tmp_value = [tmp_ptr] - expected;
- // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
- // result = tmp_value != 0;
+ // tmp_value = [tmp_ptr];
+ // } while (tmp_value == expected && failure([tmp_ptr] <- r_new_value));
+ // result = tmp_value == expected;
- vixl::aarch64::Label loop_head, exit_loop;
+ vixl::aarch64::Label loop_head;
__ Bind(&loop_head);
- __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
- __ Cmp(tmp_value, expected);
- __ B(&exit_loop, ne);
- __ Stlxr(tmp_32, value, MemOperand(tmp_ptr));
- __ Cbnz(tmp_32, &loop_head);
- __ Bind(&exit_loop);
- __ Cset(out, eq);
-
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
+ __ Ldaxr(old_value, MemOperand(tmp_ptr));
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(old_value);
+ }
+ __ Cmp(old_value, expected);
+ __ B(failure, ne);
+ if (type == DataType::Type::kReference) {
+ assembler->MaybePoisonHeapReference(value);
}
+ __ Stlxr(old_value.W(), value, MemOperand(tmp_ptr)); // Reuse `old_value` for STLXR result.
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(value);
+ }
+ __ Cbnz(old_value.W(), &loop_head);
+ __ Bind(exit_loop);
+ __ Cset(out, eq);
}
void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) {
@@ -1519,13 +1398,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt
}
void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1592,8 +1464,16 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) {
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // /* HeapReference<Class> */ temp = str->klass_
__ Ldr(temp, MemOperand(str.X(), class_offset));
+ // /* HeapReference<Class> */ temp1 = arg->klass_
__ Ldr(temp1, MemOperand(arg.X(), class_offset));
+ // Also, because we use the previously loaded class references only in the
+ // following comparison, we don't need to unpoison them.
__ Cmp(temp, temp1);
__ B(&return_false, ne);
}
@@ -1766,7 +1646,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) {
- GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true);
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ true);
}
void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -1782,7 +1662,7 @@ void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false);
+ GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero= */ false);
}
void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -2584,8 +2464,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
src.W(),
class_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
// Bail out if the source is not a non primitive array.
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
@@ -2593,8 +2473,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1,
component_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
__ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2610,8 +2490,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
dest.W(),
class_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
@@ -2627,8 +2507,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1,
component_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2646,8 +2526,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
src.W(),
class_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
__ Cmp(temp1, temp2);
@@ -2660,8 +2540,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
temp1,
component_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
// /* HeapReference<Class> */ temp1 = temp1->super_class_
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
@@ -2744,16 +2624,16 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
src.W(),
class_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
// /* HeapReference<Class> */ temp2 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
temp2_loc,
temp1,
component_offset,
temp3_loc,
- /* needs_null_check */ false,
- /* use_load_acquire */ false);
+ /* needs_null_check= */ false,
+ /* use_load_acquire= */ false);
__ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2860,7 +2740,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
codegen_->AddSlowPath(read_barrier_slow_path);
// Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
__ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
@@ -2907,7 +2787,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
+ codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -2940,7 +2820,7 @@ void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) {
- GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler());
+ GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetVIXLAssembler());
}
void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2948,7 +2828,7 @@ void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) {
- GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
+ GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetVIXLAssembler());
}
void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
@@ -2961,33 +2841,27 @@ void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
MacroAssembler* masm = GetVIXLAssembler();
Register out = RegisterFrom(locations->Out(), DataType::Type::kReference);
UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireW();
- InvokeRuntimeCallingConvention calling_convention;
- Register argument = calling_convention.GetRegisterAt(0);
if (invoke->InputAt(0)->IsConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- uint32_t address =
- dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ Mov(temp.W(), value);
__ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
@@ -2995,16 +2869,15 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
} else {
+ DCHECK(locations->CanCall());
Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32);
// Check bounds of our cache.
__ Add(out.W(), in.W(), -info.low);
- __ Cmp(out.W(), info.high - info.low + 1);
+ __ Cmp(out.W(), info.length);
vixl::aarch64::Label allocate, done;
__ B(&allocate, hs);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
MemOperand source = HeapOperand(
temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference));
codegen_->Load(DataType::Type::kReference, out, source);
@@ -3012,10 +2885,8 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
__ B(&done);
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ Str(in.W(), HeapOperand(out.W(), info.value_offset));
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
@@ -3053,6 +2924,251 @@ void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) {
void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
+void IntrinsicLocationsBuilderARM64::VisitCRC32Update(HInvoke* invoke) {
+ if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+ return;
+ }
+
+ LocationSummary* locations = new (allocator_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+// Lower the invoke of CRC32.update(int crc, int b).
+void IntrinsicCodeGeneratorARM64::VisitCRC32Update(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+ MacroAssembler* masm = GetVIXLAssembler();
+
+ Register crc = InputRegisterAt(invoke, 0);
+ Register val = InputRegisterAt(invoke, 1);
+ Register out = OutputRegister(invoke);
+
+ // The general algorithm of the CRC32 calculation is:
+ // crc = ~crc
+ // result = crc32_for_byte(crc, b)
+ // crc = ~result
+ // It is directly lowered to three instructions.
+
+ UseScratchRegisterScope temps(masm);
+ Register tmp = temps.AcquireSameSizeAs(out);
+
+ __ Mvn(tmp, crc);
+ __ Crc32b(tmp, tmp, val);
+ __ Mvn(out, tmp);
+}
+
+// Generate code using CRC32 instructions which calculates
+// a CRC32 value of a byte.
+//
+// Parameters:
+// masm - VIXL macro assembler
+// crc - a register holding an initial CRC value
+// ptr - a register holding a memory address of bytes
+// length - a register holding a number of bytes to process
+// out - a register to put a result of calculation
+static void GenerateCodeForCalculationCRC32ValueOfBytes(MacroAssembler* masm,
+ const Register& crc,
+ const Register& ptr,
+ const Register& length,
+ const Register& out) {
+ // The algorithm of CRC32 of bytes is:
+ // crc = ~crc
+ // process a few first bytes to make the array 8-byte aligned
+ // while array has 8 bytes do:
+ // crc = crc32_of_8bytes(crc, 8_bytes(array))
+ // if array has 4 bytes:
+ // crc = crc32_of_4bytes(crc, 4_bytes(array))
+ // if array has 2 bytes:
+ // crc = crc32_of_2bytes(crc, 2_bytes(array))
+ // if array has a byte:
+ // crc = crc32_of_byte(crc, 1_byte(array))
+ // crc = ~crc
+
+ vixl::aarch64::Label loop, done;
+ vixl::aarch64::Label process_4bytes, process_2bytes, process_1byte;
+ vixl::aarch64::Label aligned2, aligned4, aligned8;
+
+ // Use VIXL scratch registers as the VIXL macro assembler won't use them in
+ // instructions below.
+ UseScratchRegisterScope temps(masm);
+ Register len = temps.AcquireW();
+ Register array_elem = temps.AcquireW();
+
+ __ Mvn(out, crc);
+ __ Mov(len, length);
+
+ __ Tbz(ptr, 0, &aligned2);
+ __ Subs(len, len, 1);
+ __ B(&done, lo);
+ __ Ldrb(array_elem, MemOperand(ptr, 1, PostIndex));
+ __ Crc32b(out, out, array_elem);
+
+ __ Bind(&aligned2);
+ __ Tbz(ptr, 1, &aligned4);
+ __ Subs(len, len, 2);
+ __ B(&process_1byte, lo);
+ __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
+ __ Crc32h(out, out, array_elem);
+
+ __ Bind(&aligned4);
+ __ Tbz(ptr, 2, &aligned8);
+ __ Subs(len, len, 4);
+ __ B(&process_2bytes, lo);
+ __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
+ __ Crc32w(out, out, array_elem);
+
+ __ Bind(&aligned8);
+ __ Subs(len, len, 8);
+ // If len < 8 go to process data by 4 bytes, 2 bytes and a byte.
+ __ B(&process_4bytes, lo);
+
+ // The main loop processing data by 8 bytes.
+ __ Bind(&loop);
+ __ Ldr(array_elem.X(), MemOperand(ptr, 8, PostIndex));
+ __ Subs(len, len, 8);
+ __ Crc32x(out, out, array_elem.X());
+ // if len >= 8, process the next 8 bytes.
+ __ B(&loop, hs);
+
+ // Process the data which is less than 8 bytes.
+ // The code generated below works with values of len
+ // which come in the range [-8, 0].
+ // The first three bits are used to detect whether 4 bytes or 2 bytes or
+ // a byte can be processed.
+ // The checking order is from bit 2 to bit 0:
+ // bit 2 is set: at least 4 bytes available
+ // bit 1 is set: at least 2 bytes available
+ // bit 0 is set: at least a byte available
+ __ Bind(&process_4bytes);
+ // Goto process_2bytes if less than four bytes available
+ __ Tbz(len, 2, &process_2bytes);
+ __ Ldr(array_elem, MemOperand(ptr, 4, PostIndex));
+ __ Crc32w(out, out, array_elem);
+
+ __ Bind(&process_2bytes);
+ // Goto process_1bytes if less than two bytes available
+ __ Tbz(len, 1, &process_1byte);
+ __ Ldrh(array_elem, MemOperand(ptr, 2, PostIndex));
+ __ Crc32h(out, out, array_elem);
+
+ __ Bind(&process_1byte);
+ // Goto done if no bytes available
+ __ Tbz(len, 0, &done);
+ __ Ldrb(array_elem, MemOperand(ptr));
+ __ Crc32b(out, out, array_elem);
+
+ __ Bind(&done);
+ __ Mvn(out, out);
+}
+
+// The threshold for sizes of arrays to use the library provided implementation
+// of CRC32.updateBytes instead of the intrinsic.
+static constexpr int32_t kCRC32UpdateBytesThreshold = 64 * 1024;
+
+void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+ if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+ return;
+ }
+
+ LocationSummary* locations =
+ new (allocator_) LocationSummary(invoke,
+ LocationSummary::kCallOnSlowPath,
+ kIntrinsified);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RegisterOrConstant(invoke->InputAt(2)));
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+// Lower the invoke of CRC32.updateBytes(int crc, byte[] b, int off, int len)
+//
+// Note: The intrinsic is not used if len exceeds a threshold.
+void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateBytes(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+ MacroAssembler* masm = GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ SlowPathCodeARM64* slow_path =
+ new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke);
+ codegen_->AddSlowPath(slow_path);
+
+ Register length = WRegisterFrom(locations->InAt(3));
+ __ Cmp(length, kCRC32UpdateBytesThreshold);
+ __ B(slow_path->GetEntryLabel(), hi);
+
+ const uint32_t array_data_offset =
+ mirror::Array::DataOffset(Primitive::kPrimByte).Uint32Value();
+ Register ptr = XRegisterFrom(locations->GetTemp(0));
+ Register array = XRegisterFrom(locations->InAt(1));
+ Location offset = locations->InAt(2);
+ if (offset.IsConstant()) {
+ int32_t offset_value = offset.GetConstant()->AsIntConstant()->GetValue();
+ __ Add(ptr, array, array_data_offset + offset_value);
+ } else {
+ __ Add(ptr, array, array_data_offset);
+ __ Add(ptr, ptr, XRegisterFrom(offset));
+ }
+
+ Register crc = WRegisterFrom(locations->InAt(0));
+ Register out = WRegisterFrom(locations->Out());
+
+ GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
+
+ __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
+ if (!codegen_->GetInstructionSetFeatures().HasCRC()) {
+ return;
+ }
+
+ LocationSummary* locations =
+ new (allocator_) LocationSummary(invoke,
+ LocationSummary::kNoCall,
+ kIntrinsified);
+
+ locations->SetInAt(0, Location::RequiresRegister());
+ locations->SetInAt(1, Location::RequiresRegister());
+ locations->SetInAt(2, Location::RequiresRegister());
+ locations->SetInAt(3, Location::RequiresRegister());
+ locations->AddTemp(Location::RequiresRegister());
+ locations->SetOut(Location::RequiresRegister());
+}
+
+// Lower the invoke of CRC32.updateByteBuffer(int crc, long addr, int off, int len)
+//
+// There is no need to generate code checking if addr is 0.
+// The method updateByteBuffer is a private method of java.util.zip.CRC32.
+// This guarantees no calls outside of the CRC32 class.
+// An address of DirectBuffer is always passed to the call of updateByteBuffer.
+// It might be an implementation of an empty DirectBuffer which can use a zero
+// address but it must have the length to be zero. The current generated code
+// correctly works with the zero length.
+void IntrinsicCodeGeneratorARM64::VisitCRC32UpdateByteBuffer(HInvoke* invoke) {
+ DCHECK(codegen_->GetInstructionSetFeatures().HasCRC());
+
+ MacroAssembler* masm = GetVIXLAssembler();
+ LocationSummary* locations = invoke->GetLocations();
+
+ Register addr = XRegisterFrom(locations->InAt(1));
+ Register ptr = XRegisterFrom(locations->GetTemp(0));
+ __ Add(ptr, addr, XRegisterFrom(locations->InAt(2)));
+
+ Register crc = WRegisterFrom(locations->InAt(0));
+ Register length = WRegisterFrom(locations->InAt(3));
+ Register out = WRegisterFrom(locations->Out());
+ GenerateCodeForCalculationCRC32ValueOfBytes(masm, crc, ptr, length, out);
+}
+
UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 033a644f34..9c46efddec 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -37,7 +37,7 @@ namespace arm64 {
class CodeGeneratorARM64;
-class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderARM64 final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen)
: allocator_(allocator), codegen_(codegen) {}
@@ -45,7 +45,7 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -63,14 +63,14 @@ class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
};
-class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorARM64 final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorARM64(CodeGeneratorARM64* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 97a145664c..f0aa92e981 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -25,7 +25,7 @@
#include "mirror/array-inl.h"
#include "mirror/object_array-inl.h"
#include "mirror/reference.h"
-#include "mirror/string.h"
+#include "mirror/string-inl.h"
#include "scoped_thread_state_change-inl.h"
#include "thread-current-inl.h"
@@ -41,19 +41,15 @@ using helpers::HighRegisterFrom;
using helpers::InputDRegisterAt;
using helpers::InputRegisterAt;
using helpers::InputSRegisterAt;
-using helpers::InputVRegisterAt;
using helpers::Int32ConstantFrom;
using helpers::LocationFrom;
using helpers::LowRegisterFrom;
using helpers::LowSRegisterFrom;
using helpers::HighSRegisterFrom;
using helpers::OutputDRegister;
-using helpers::OutputSRegister;
using helpers::OutputRegister;
-using helpers::OutputVRegister;
using helpers::RegisterFrom;
using helpers::SRegisterFrom;
-using helpers::DRegisterFromS;
using namespace vixl::aarch32; // NOLINT(build/namespaces)
@@ -89,7 +85,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
return calling_convention_visitor.GetMethodLocation();
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
__ Bind(GetEntryLabel());
@@ -115,7 +111,7 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+ const char* GetDescription() const override { return "IntrinsicSlowPath"; }
private:
// The instruction where this slow path is happening.
@@ -177,7 +173,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
DCHECK(kUseBakerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
LocationSummary* locations = instruction_->GetLocations();
@@ -233,11 +229,11 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
assembler->MaybePoisonHeapReference(tmp);
__ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
__ Cmp(src_curr_addr, src_stop_addr);
- __ B(ne, &loop, /* far_target */ false);
+ __ B(ne, &loop, /* is_far_target= */ false);
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE {
+ const char* GetDescription() const override {
return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
}
@@ -302,10 +298,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invo
}
void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -316,10 +312,10 @@ void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -359,7 +355,7 @@ static void GenNumberOfLeadingZeros(HInvoke* invoke,
vixl32::Label end;
vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Clz(out, in_reg_hi);
- __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* is_far_target= */ false);
__ Clz(out, in_reg_lo);
__ Add(out, out, 32);
if (end.IsReferenced()) {
@@ -402,7 +398,7 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke,
vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
__ Rbit(out, in_reg_lo);
__ Clz(out, out);
- __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
+ __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* is_far_target= */ false);
__ Rbit(out, in_reg_hi);
__ Clz(out, out);
__ Add(out, out, 32);
@@ -432,341 +428,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invo
GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
}
-static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
- __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke, GetAssembler());
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations,
- bool is64bit,
- ArmVIXLAssembler* assembler) {
- Location in = locations->InAt(0);
- Location output = locations->Out();
-
- vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
-
- if (is64bit) {
- vixl32::Register in_reg_lo = LowRegisterFrom(in);
- vixl32::Register in_reg_hi = HighRegisterFrom(in);
- vixl32::Register out_reg_lo = LowRegisterFrom(output);
- vixl32::Register out_reg_hi = HighRegisterFrom(output);
-
- DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
-
- __ Asr(mask, in_reg_hi, 31);
- __ Adds(out_reg_lo, in_reg_lo, mask);
- __ Adc(out_reg_hi, in_reg_hi, mask);
- __ Eor(out_reg_lo, mask, out_reg_lo);
- __ Eor(out_reg_hi, mask, out_reg_hi);
- } else {
- vixl32::Register in_reg = RegisterFrom(in);
- vixl32::Register out_reg = RegisterFrom(output);
-
- __ Asr(mask, in_reg, 31);
- __ Add(out_reg, in_reg, mask);
- __ Eor(out_reg, mask, out_reg);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::SRegister op1 = SRegisterFrom(op1_loc);
- vixl32::SRegister op2 = SRegisterFrom(op2_loc);
- vixl32::SRegister out = OutputSRegister(invoke);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp1 = temps.Acquire();
- vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
- vixl32::Label nan, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F32, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
- __ Vmov(temp1, op1);
- __ Vmov(temp2, op2);
- if (is_min) {
- __ Orr(temp1, temp1, temp2);
- } else {
- __ And(temp1, temp1, temp2);
- }
- __ Vmov(out, temp1);
- __ B(final_label);
-
- // handle NaN input.
- __ Bind(&nan);
- __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
- __ Vmov(out, temp1);
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
- invoke->GetLocations()->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
- ArmVIXLAssembler* assembler = codegen->GetAssembler();
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
- return;
- }
-
- vixl32::DRegister op1 = DRegisterFrom(op1_loc);
- vixl32::DRegister op2 = DRegisterFrom(op2_loc);
- vixl32::DRegister out = OutputDRegister(invoke);
- vixl32::Label handle_nan_eq, done;
- vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
-
- DCHECK(op1.Is(out));
-
- __ Vcmp(op1, op2);
- __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
- __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
-
- // op1 <> op2
- vixl32::ConditionType cond = is_min ? gt : lt;
- {
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ it(cond);
- __ vmov(cond, F64, out, op2);
- }
- // for <>(not equal), we've done min/max calculation.
- __ B(ne, final_label, /* far_target */ false);
-
- // handle op1 == op2, max(+0.0,-0.0).
- if (!is_min) {
- __ Vand(F64, out, op1, op2);
- __ B(final_label);
- }
-
- // handle op1 == op2, min(+0.0,-0.0), NaN input.
- __ Bind(&handle_nan_eq);
- __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
-
- if (done.IsReferenced()) {
- __ Bind(&done);
- }
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
-}
-
-static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- Location op1_loc = invoke->GetLocations()->InAt(0);
- Location op2_loc = invoke->GetLocations()->InAt(1);
- Location out_loc = invoke->GetLocations()->Out();
-
- // Optimization: don't generate any code if inputs are the same.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
- return;
- }
-
- vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
- vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
- vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
- vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
- vixl32::Register out_lo = LowRegisterFrom(out_loc);
- vixl32::Register out_hi = HighRegisterFrom(out_loc);
- UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
- const vixl32::Register temp = temps.Acquire();
-
- DCHECK(op1_lo.Is(out_lo));
- DCHECK(op1_hi.Is(out_hi));
-
- // Compare op1 >= op2, or op1 < op2.
- __ Cmp(out_lo, op2_lo);
- __ Sbcs(temp, out_hi, op2_hi);
-
- // Now GE/LT condition code is correct for the long comparison.
- {
- vixl32::ConditionType cond = is_min ? ge : lt;
- ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
- __ itt(cond);
- __ mov(cond, out_lo, op2_lo);
- __ mov(cond, out_hi, op2_hi);
- }
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
-}
-
-static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
- vixl32::Register op1 = InputRegisterAt(invoke, 0);
- vixl32::Register op2 = InputRegisterAt(invoke, 1);
- vixl32::Register out = OutputRegister(invoke);
-
- __ Cmp(op1, op2);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ ite(is_min ? lt : gt);
- __ mov(is_min ? lt : gt, out, op1);
- __ mov(is_min ? ge : le, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke, /* is_min */ false, GetAssembler());
-}
-
void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
CreateFPToFPLocations(allocator_, invoke);
}
@@ -785,7 +446,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
ArmVIXLAssembler* assembler = GetAssembler();
- __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+ __ Vrintn(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
@@ -815,12 +476,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
// For positive, zero or NaN inputs, rounding is done.
__ Cmp(out_reg, 0);
- __ B(ge, final_label, /* far_target */ false);
+ __ B(ge, final_label, /* is_far_target= */ false);
// Handle input < 0 cases.
// If input is negative but not a tie, previous result (round to nearest) is valid.
// If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
- __ Vrinta(F32, F32, temp1, in_reg);
+ __ Vrinta(F32, temp1, in_reg);
__ Vmov(temp2, 0.5);
__ Vsub(F32, temp1, in_reg, temp1);
__ Vcmp(F32, temp1, temp2);
@@ -977,8 +638,11 @@ static void GenUnsafeGet(HInvoke* invoke,
if (kEmitCompilerReadBarrier) {
if (kUseBakerReadBarrier) {
Location temp = locations->GetTemp(0);
- codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+ // Piggy-back on the field load path using introspection for the Baker read barrier.
+ __ Add(RegisterFrom(temp), base, Operand(offset));
+ MemOperand src(RegisterFrom(temp), 0);
+ codegen->GenerateFieldLoadWithBakerReadBarrier(
+ invoke, trg_loc, base, src, /* needs_null_check= */ false);
if (is_volatile) {
__ Dmb(vixl32::ISH);
}
@@ -1069,22 +733,22 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* inv
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
@@ -1114,39 +778,39 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator,
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke);
+ allocator_, features_, DataType::Type::kInt32, /* is_volatile= */ true, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kReference, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke);
+ allocator_, features_, DataType::Type::kReference, /* is_volatile= */ true, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
+ allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ false, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoid(
- allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke);
+ allocator_, features_, DataType::Type::kInt64, /* is_volatile= */ true, invoke);
}
static void GenUnsafePut(LocationSummary* locations,
@@ -1180,7 +844,7 @@ static void GenUnsafePut(LocationSummary* locations,
__ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
__ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
__ Cmp(temp_lo, 0);
- __ B(ne, &loop_head, /* far_target */ false);
+ __ B(ne, &loop_head, /* is_far_target= */ false);
} else {
__ Strd(value_lo, value_hi, MemOperand(base, offset));
}
@@ -1211,70 +875,68 @@ static void GenUnsafePut(LocationSummary* locations,
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
-static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
- HInvoke* invoke,
- DataType::Type type) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
bool can_call = kEmitCompilerReadBarrier &&
kUseBakerReadBarrier &&
(invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
@@ -1284,20 +946,16 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
? LocationSummary::kCallOnSlowPath
: LocationSummary::kNoCall,
kIntrinsified);
+ if (can_call) {
+ locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
+ }
locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
locations->SetInAt(1, Location::RequiresRegister());
locations->SetInAt(2, Location::RequiresRegister());
locations->SetInAt(3, Location::RequiresRegister());
locations->SetInAt(4, Location::RequiresRegister());
- // If heap poisoning is enabled, we don't want the unpoisoning
- // operations to potentially clobber the output. Likewise when
- // emitting a (Baker) read barrier, which may call.
- Location::OutputOverlap overlaps =
- ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
- ? Location::kOutputOverlap
- : Location::kNoOutputOverlap;
- locations->SetOut(Location::RequiresRegister(), overlaps);
+ locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
// Temporary registers used in CAS. In the object case
// (UnsafeCASObject intrinsic), these are also used for
@@ -1306,24 +964,92 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator,
locations->AddTemp(Location::RequiresRegister()); // Temp 1.
}
+class BakerReadBarrierCasSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ explicit BakerReadBarrierCasSlowPathARMVIXL(HInvoke* invoke)
+ : SlowPathCodeARMVIXL(invoke) {}
+
+ const char* GetDescription() const override { return "BakerReadBarrierCasSlowPathARMVIXL"; }
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+ ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+ __ Bind(GetEntryLabel());
+
+ LocationSummary* locations = instruction_->GetLocations();
+ vixl32::Register base = InputRegisterAt(instruction_, 1); // Object pointer.
+ vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
+ vixl32::Register expected = InputRegisterAt(instruction_, 3); // Expected.
+ vixl32::Register value = InputRegisterAt(instruction_, 4); // Value.
+
+ vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
+
+ // The `tmp` is initialized to `[tmp_ptr] - expected` in the main path. Reconstruct
+ // and mark the old value and compare with `expected`. We clobber `tmp_ptr` in the
+ // process due to lack of other temps suitable for the read barrier.
+ arm_codegen->GenerateUnsafeCasOldValueAddWithBakerReadBarrier(tmp_ptr, tmp, expected);
+ __ Cmp(tmp_ptr, expected);
+ __ B(ne, GetExitLabel());
+
+ // The old value we have read did not match `expected` (which is always a to-space reference)
+ // but after the read barrier in GenerateUnsafeCasOldValueAddWithBakerReadBarrier() the marked
+ // to-space value matched, so the old value must be a from-space reference to the same object.
+ // Do the same CAS loop as the main path but check for both `expected` and the unmarked
+ // old value representing the to-space and from-space references for the same object.
+
+ UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+ vixl32::Register adjusted_old_value = temps.Acquire(); // For saved `tmp` from main path.
+
+ // Recalculate the `tmp_ptr` clobbered above and store the `adjusted_old_value`, i.e. IP.
+ __ Add(tmp_ptr, base, offset);
+ __ Mov(adjusted_old_value, tmp);
+
+ // do {
+ // tmp = [r_ptr] - expected;
+ // } while ((tmp == 0 || tmp == adjusted_old_value) && failure([r_ptr] <- r_new_value));
+ // result = (tmp == 0 || tmp == adjusted_old_value);
+
+ vixl32::Label loop_head;
+ __ Bind(&loop_head);
+ __ Ldrex(tmp, MemOperand(tmp_ptr)); // This can now load null stored by another thread.
+ assembler->MaybeUnpoisonHeapReference(tmp);
+ __ Subs(tmp, tmp, expected); // Use SUBS to get non-zero value if both compares fail.
+ {
+ // If the newly loaded value did not match `expected`, compare with `adjusted_old_value`.
+ ExactAssemblyScope aas(assembler->GetVIXLAssembler(), 2 * k16BitT32InstructionSizeInBytes);
+ __ it(ne);
+ __ cmp(ne, tmp, adjusted_old_value);
+ }
+ __ B(ne, GetExitLabel());
+ assembler->MaybePoisonHeapReference(value);
+ __ Strex(tmp, value, MemOperand(tmp_ptr));
+ assembler->MaybeUnpoisonHeapReference(value);
+ __ Cmp(tmp, 0);
+ __ B(ne, &loop_head, /* is_far_target= */ false);
+ __ B(GetExitLabel());
+ }
+};
+
static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
DCHECK_NE(type, DataType::Type::kInt64);
ArmVIXLAssembler* assembler = codegen->GetAssembler();
LocationSummary* locations = invoke->GetLocations();
- Location out_loc = locations->Out();
vixl32::Register out = OutputRegister(invoke); // Boolean result.
vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
- Location offset_loc = locations->InAt(2);
- vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
+ vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Offset (discard high 4B).
vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
- Location tmp_ptr_loc = locations->GetTemp(0);
- vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
- vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
+ vixl32::Register tmp_ptr = RegisterFrom(locations->GetTemp(0)); // Pointer to actual memory.
+ vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Temporary.
+
+ vixl32::Label loop_exit_label;
+ vixl32::Label* loop_exit = &loop_exit_label;
+ vixl32::Label* failure = &loop_exit_label;
if (type == DataType::Type::kReference) {
// The only read barrier implementation supporting the
@@ -1336,87 +1062,63 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* c
codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
- // Need to make sure the reference stored in the field is a to-space
- // one before attempting the CAS or the CAS could fail incorrectly.
- codegen->UpdateReferenceFieldWithBakerReadBarrier(
- invoke,
- out_loc, // Unused, used only as a "temporary" within the read barrier.
- base,
- /* field_offset */ offset_loc,
- tmp_ptr_loc,
- /* needs_null_check */ false,
- tmp);
+ // If marking, check if the stored reference is a from-space reference to the same
+ // object as the to-space reference `expected`. If so, perform a custom CAS loop.
+ BakerReadBarrierCasSlowPathARMVIXL* slow_path =
+ new (codegen->GetScopedAllocator()) BakerReadBarrierCasSlowPathARMVIXL(invoke);
+ codegen->AddSlowPath(slow_path);
+ failure = slow_path->GetEntryLabel();
+ loop_exit = slow_path->GetExitLabel();
}
}
// Prevent reordering with prior memory operations.
// Emit a DMB ISH instruction instead of an DMB ISHST one, as the
- // latter allows a preceding load to be delayed past the STXR
+ // latter allows a preceding load to be delayed past the STREX
// instruction below.
__ Dmb(vixl32::ISH);
__ Add(tmp_ptr, base, offset);
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->PoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not poison `value`, as it is the same register as
- // `expected`, which has just been poisoned.
- } else {
- codegen->GetAssembler()->PoisonHeapReference(value);
- }
- }
-
// do {
// tmp = [r_ptr] - expected;
// } while (tmp == 0 && failure([r_ptr] <- r_new_value));
- // result = tmp != 0;
+ // result = tmp == 0;
vixl32::Label loop_head;
__ Bind(&loop_head);
-
__ Ldrex(tmp, MemOperand(tmp_ptr));
-
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(tmp);
+ }
__ Subs(tmp, tmp, expected);
-
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 3 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ itt(eq);
- __ strex(eq, tmp, value, MemOperand(tmp_ptr));
- __ cmp(eq, tmp, 1);
+ static_cast<vixl32::MacroAssembler*>(assembler->GetVIXLAssembler())->
+ B(ne, failure, /* hint= */ (failure == loop_exit) ? kNear : kBranchWithoutHint);
+ if (type == DataType::Type::kReference) {
+ assembler->MaybePoisonHeapReference(value);
+ }
+ __ Strex(tmp, value, MemOperand(tmp_ptr));
+ if (type == DataType::Type::kReference) {
+ assembler->MaybeUnpoisonHeapReference(value);
}
+ __ Cmp(tmp, 0);
+ __ B(ne, &loop_head, /* is_far_target= */ false);
- __ B(eq, &loop_head, /* far_target */ false);
+ __ Bind(loop_exit);
__ Dmb(vixl32::ISH);
- __ Rsbs(out, tmp, 1);
+ // out = tmp == 0.
+ __ Clz(out, tmp);
+ __ Lsr(out, out, WhichPowerOf2(out.GetSizeInBits()));
- {
- ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
- 2 * kMaxInstructionSizeInBytes,
- CodeBufferCheckScope::kMaximumSize);
-
- __ it(cc);
- __ mov(cc, out, 0);
- }
-
- if (kPoisonHeapReferences && type == DataType::Type::kReference) {
- codegen->GetAssembler()->UnpoisonHeapReference(expected);
- if (value.Is(expected)) {
- // Do not unpoison `value`, as it is the same register as
- // `expected`, which has just been unpoisoned.
- } else {
- codegen->GetAssembler()->UnpoisonHeapReference(value);
- }
+ if (type == DataType::Type::kReference) {
+ codegen->MaybeGenerateMarkingRegisterCheck(/* code= */ 128);
}
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
- CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32);
+ CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
}
void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
// The only read barrier implementation supporting the
@@ -1425,7 +1127,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
return;
}
- CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference);
+ CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke);
}
void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
GenCas(invoke, DataType::Type::kInt32, codegen_);
@@ -1606,23 +1308,23 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
__ Ldr(temp_reg, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
__ Cmp(temp_reg, temp2);
- __ B(ne, &find_char_diff, /* far_target */ false);
+ __ B(ne, &find_char_diff, /* is_far_target= */ false);
__ Add(temp1, temp1, char_size * 2);
__ Ldr(temp_reg, MemOperand(str, temp1));
__ Ldr(temp2, MemOperand(arg, temp1));
__ Cmp(temp_reg, temp2);
- __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
+ __ B(ne, &find_char_diff_2nd_cmp, /* is_far_target= */ false);
__ Add(temp1, temp1, char_size * 2);
// With string compression, we have compared 8 bytes, otherwise 4 chars.
__ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
- __ B(hi, &loop, /* far_target */ false);
+ __ B(hi, &loop, /* is_far_target= */ false);
__ B(end);
__ Bind(&find_char_diff_2nd_cmp);
if (mirror::kUseStringCompression) {
__ Subs(temp0, temp0, 4); // 4 bytes previously compared.
- __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end?
+ __ B(ls, end, /* is_far_target= */ false); // Was the second comparison fully beyond the end?
} else {
// Without string compression, we can start treating temp0 as signed
// and rely on the signed comparison below.
@@ -1650,7 +1352,7 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
// the remaining string data, so just return length diff (out).
// The comparison is unsigned for string compression, otherwise signed.
__ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
- __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false);
+ __ B((mirror::kUseStringCompression ? ls : le), end, /* is_far_target= */ false);
// Extract the characters and calculate the difference.
if (mirror::kUseStringCompression) {
@@ -1717,9 +1419,9 @@ static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler,
__ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
__ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
__ Cmp(temp_reg, temp3);
- __ B(ne, &different_compression_diff, /* far_target */ false);
+ __ B(ne, &different_compression_diff, /* is_far_target= */ false);
__ Subs(temp0, temp0, 2);
- __ B(hi, &different_compression_loop, /* far_target */ false);
+ __ B(hi, &different_compression_loop, /* is_far_target= */ false);
__ B(end);
// Calculate the difference.
@@ -1757,13 +1459,6 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt
}
void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
InvokeRuntimeCallingConventionARMVIXL calling_convention;
@@ -1822,22 +1517,30 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
StringEqualsOptimizations optimizations(invoke);
if (!optimizations.GetArgumentNotNull()) {
// Check if input is null, return false if it is.
- __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
+ __ CompareAndBranchIfZero(arg, &return_false, /* is_far_target= */ false);
}
// Reference equality check, return true if same reference.
__ Cmp(str, arg);
- __ B(eq, &return_true, /* far_target */ false);
+ __ B(eq, &return_true, /* is_far_target= */ false);
if (!optimizations.GetArgumentIsString()) {
// Instanceof check for the argument by comparing class fields.
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // /* HeapReference<Class> */ temp = str->klass_
__ Ldr(temp, MemOperand(str, class_offset));
+ // /* HeapReference<Class> */ out = arg->klass_
__ Ldr(out, MemOperand(arg, class_offset));
+ // Also, because we use the previously loaded class references only in the
+ // following comparison, we don't need to unpoison them.
__ Cmp(temp, out);
- __ B(ne, &return_false, /* far_target */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
}
// Check if one of the inputs is a const string. Do not special-case both strings
@@ -1860,7 +1563,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// Also compares the compression style, if differs return false.
__ Ldr(temp, MemOperand(arg, count_offset));
__ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
- __ B(ne, &return_false, /* far_target */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
} else {
// Load `count` fields of this and argument strings.
__ Ldr(temp, MemOperand(str, count_offset));
@@ -1868,7 +1571,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
// Check if `count` fields are equal, return false if they're not.
// Also compares the compression style, if differs return false.
__ Cmp(temp, out);
- __ B(ne, &return_false, /* far_target */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
}
// Assertions that must hold in order to compare strings 4 bytes at a time.
@@ -1891,9 +1594,9 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
__ Ldrd(temp, temp1, MemOperand(str, offset));
__ Ldrd(temp2, out, MemOperand(arg, offset));
__ Cmp(temp, temp2);
- __ B(ne, &return_false, /* far_label */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
__ Cmp(temp1, out);
- __ B(ne, &return_false, /* far_label */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
offset += 2u * sizeof(uint32_t);
remaining_bytes -= 2u * sizeof(uint32_t);
}
@@ -1901,13 +1604,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
__ Ldr(temp, MemOperand(str, offset));
__ Ldr(out, MemOperand(arg, offset));
__ Cmp(temp, out);
- __ B(ne, &return_false, /* far_label */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
}
} else {
// Return true if both strings are empty. Even with string compression `count == 0` means empty.
static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
"Expecting 0=compressed, 1=uncompressed");
- __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
+ __ CompareAndBranchIfZero(temp, &return_true, /* is_far_target= */ false);
if (mirror::kUseStringCompression) {
// For string compression, calculate the number of bytes to compare (not chars).
@@ -1933,10 +1636,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
__ Ldr(temp2, MemOperand(arg, temp1));
__ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
__ Cmp(out, temp2);
- __ B(ne, &return_false, /* far_target */ false);
+ __ B(ne, &return_false, /* is_far_target= */ false);
// With string compression, we have compared 4 bytes, otherwise 2 chars.
__ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
- __ B(hi, &loop, /* far_target */ false);
+ __ B(hi, &loop, /* is_far_target= */ false);
}
// Return true and exit the function.
@@ -2017,7 +1720,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
- GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
}
void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
@@ -2033,7 +1736,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke)
}
void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
+ GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
}
void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -2137,8 +1840,6 @@ void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// is clobbered by ReadBarrierMarkRegX entry points). Get an extra
// temporary register from the register allocator.
locations->AddTemp(Location::RequiresRegister());
- CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
- arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
}
}
@@ -2257,7 +1958,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
- __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
+ __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
}
__ Cmp(RegisterFrom(dest_pos), src_pos_constant);
__ B(gt, intrinsic_slow_path->GetEntryLabel());
@@ -2265,7 +1966,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
} else {
if (!optimizations.GetDestinationIsSource()) {
__ Cmp(src, dest);
- __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
+ __ B(ne, &conditions_on_positions_validated, /* is_far_target= */ false);
}
if (dest_pos.IsConstant()) {
int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
@@ -2325,11 +2026,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (!optimizations.GetSourceIsNonPrimitiveArray()) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
// Bail out if the source is not a non primitive array.
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2341,7 +2042,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check= */ false);
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
@@ -2353,7 +2054,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// temporaries such a `temp1`.
// /* HeapReference<Class> */ temp2 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+ invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2367,16 +2068,16 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+ invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check= */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
__ Cmp(temp1, temp2);
if (optimizations.GetDestinationIsTypedObjectArray()) {
vixl32::Label do_copy;
- __ B(eq, &do_copy, /* far_target */ false);
+ __ B(eq, &do_copy, /* is_far_target= */ false);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
// /* HeapReference<Class> */ temp1 = temp1->super_class_
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
@@ -2433,7 +2134,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (optimizations.GetDestinationIsTypedObjectArray()) {
vixl32::Label do_copy;
- __ B(eq, &do_copy, /* far_target */ false);
+ __ B(eq, &do_copy, /* is_far_target= */ false);
if (!did_unpoison) {
assembler->MaybeUnpoisonHeapReference(temp1);
}
@@ -2455,10 +2156,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check= */ false);
// /* HeapReference<Class> */ temp3 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+ invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check= */ false);
__ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp3` has been unpoisoned
// by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
@@ -2486,7 +2187,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
if (length.IsRegister()) {
// Don't enter the copy loop if the length is null.
- __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
+ __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target= */ false);
}
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
@@ -2543,7 +2244,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
// Given the numeric representation, it's enough to check the low bit of the
// rb_state. We do that by shifting the bit out of the lock word with LSRS
// which can be a 16-bit instruction unlike the TST immediate.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
__ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
// Carry flag is the last bit shifted out by LSRS.
@@ -2563,7 +2264,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
}
__ Cmp(temp1, temp3);
- __ B(ne, &loop, /* far_target */ false);
+ __ B(ne, &loop, /* is_far_target= */ false);
__ Bind(read_barrier_slow_path->GetExitLabel());
} else {
@@ -2585,13 +2286,13 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
__ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
}
__ Cmp(temp1, temp3);
- __ B(ne, &loop, /* far_target */ false);
+ __ B(ne, &loop, /* is_far_target= */ false);
}
__ Bind(&done);
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* can_be_null= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -3121,7 +2822,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ Subs(num_chr, srcEnd, srcBegin);
// Early out for valid zero-length retrievals.
- __ B(eq, final_label, /* far_target */ false);
+ __ B(eq, final_label, /* is_far_target= */ false);
// src range to copy.
__ Add(src_ptr, srcObj, value_offset);
@@ -3137,7 +2838,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ Ldr(temp, MemOperand(srcObj, count_offset));
__ Tst(temp, 1);
temps.Release(temp);
- __ B(eq, &compressed_string_preloop, /* far_target */ false);
+ __ B(eq, &compressed_string_preloop, /* is_far_target= */ false);
}
__ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
@@ -3147,7 +2848,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
temp = temps.Acquire();
// Save repairing the value of num_chr on the < 4 character path.
__ Subs(temp, num_chr, 4);
- __ B(lt, &remainder, /* far_target */ false);
+ __ B(lt, &remainder, /* is_far_target= */ false);
// Keep the result of the earlier subs, we are going to fetch at least 4 characters.
__ Mov(num_chr, temp);
@@ -3162,10 +2863,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
__ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
temps.Release(temp);
- __ B(ge, &loop, /* far_target */ false);
+ __ B(ge, &loop, /* is_far_target= */ false);
__ Adds(num_chr, num_chr, 4);
- __ B(eq, final_label, /* far_target */ false);
+ __ B(eq, final_label, /* is_far_target= */ false);
// Main loop for < 4 character case and remainder handling. Loads and stores one
// 16-bit Java character at a time.
@@ -3175,7 +2876,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ Subs(num_chr, num_chr, 1);
__ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
temps.Release(temp);
- __ B(gt, &remainder, /* far_target */ false);
+ __ B(gt, &remainder, /* is_far_target= */ false);
if (mirror::kUseStringCompression) {
__ B(final_label);
@@ -3191,7 +2892,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke)
__ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
temps.Release(temp);
__ Subs(num_chr, num_chr, 1);
- __ B(gt, &compressed_string_loop, /* far_target */ false);
+ __ B(gt, &compressed_string_loop, /* is_far_target= */ false);
}
if (done.IsReferenced()) {
@@ -3252,7 +2953,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
ArmVIXLAssembler* assembler = GetAssembler();
DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
- __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+ __ Vrintp(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
@@ -3264,7 +2965,7 @@ void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
ArmVIXLAssembler* assembler = GetAssembler();
DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
- __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+ __ Vrintm(F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
}
void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
@@ -3277,33 +2978,27 @@ void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
ArmVIXLAssembler* const assembler = GetAssembler();
vixl32::Register out = RegisterFrom(locations->Out());
UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
- InvokeRuntimeCallingConventionARMVIXL calling_convention;
- vixl32::Register argument = calling_convention.GetRegisterAt(0);
if (invoke->InputAt(0)->IsConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- uint32_t address =
- dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ Mov(temp, value);
assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
@@ -3311,25 +3006,22 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
} else {
+ DCHECK(locations->CanCall());
vixl32::Register in = RegisterFrom(locations->InAt(0));
// Check bounds of our cache.
__ Add(out, in, -info.low);
- __ Cmp(out, info.high - info.low + 1);
+ __ Cmp(out, info.length);
vixl32::Label allocate, done;
- __ B(hs, &allocate, /* is_far_target */ false);
+ __ B(hs, &allocate, /* is_far_target= */ false);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+ codegen_->LoadBootImageAddress(temp, info.array_data_boot_image_reference);
codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
assembler->MaybeUnpoisonHeapReference(out);
__ B(&done);
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
@@ -3353,7 +3045,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
vixl32::Register temp = temps.Acquire();
vixl32::Label done;
vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
- __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
+ __ CompareAndBranchIfZero(out, final_label, /* is_far_target= */ false);
__ Dmb(vixl32::ISH);
__ Mov(temp, 0);
assembler->StoreToOffset(kStoreWord, temp, tr, offset);
@@ -3375,6 +3067,9 @@ UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing
UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, CRC32UpdateByteBuffer)
UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 9c02d0a4ad..1fea776f0d 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -27,14 +27,14 @@ namespace arm {
class ArmVIXLAssembler;
class CodeGeneratorARMVIXL;
-class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderARMVIXL final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen);
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -54,14 +54,14 @@ class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL);
};
-class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorARMVIXL final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index b7936b9c8e..3da0e578bf 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -58,6 +58,10 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const {
return codegen_->GetInstructionSetFeatures().Is32BitFloatingPoint();
}
+inline bool IntrinsicCodeGeneratorMIPS::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -104,7 +108,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS {
public:
explicit IntrinsicSlowPathMIPS(HInvoke* invoke) : SlowPathCodeMIPS(invoke), invoke_(invoke) { }
- void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
CodeGeneratorMIPS* codegen = down_cast<CodeGeneratorMIPS*>(codegen_in);
__ Bind(GetEntryLabel());
@@ -133,7 +137,7 @@ class IntrinsicSlowPathMIPS : public SlowPathCodeMIPS {
__ B(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS"; }
+ const char* GetDescription() const override { return "IntrinsicSlowPathMIPS"; }
private:
// The instruction where this slow path is happening.
@@ -181,7 +185,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invo
}
void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
// int java.lang.Float.floatToRawIntBits(float)
@@ -190,7 +194,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke)
}
void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -222,7 +226,7 @@ void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke)
}
void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
// float java.lang.Float.intBitsToFloat(int)
@@ -231,7 +235,7 @@ void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator,
@@ -407,7 +411,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) {
DataType::Type::kInt32,
IsR2OrNewer(),
IsR6(),
- /* reverseBits */ false,
+ /* reverseBits= */ false,
GetAssembler());
}
@@ -421,7 +425,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) {
DataType::Type::kInt64,
IsR2OrNewer(),
IsR6(),
- /* reverseBits */ false,
+ /* reverseBits= */ false,
GetAssembler());
}
@@ -435,7 +439,7 @@ void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) {
DataType::Type::kInt16,
IsR2OrNewer(),
IsR6(),
- /* reverseBits */ false,
+ /* reverseBits= */ false,
GetAssembler());
}
@@ -475,7 +479,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
+ GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler());
}
// int java.lang.Long.numberOfLeadingZeros(long i)
@@ -484,7 +488,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invok
}
void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
+ GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler());
}
static void GenNumberOfTrailingZeroes(LocationSummary* locations,
@@ -562,7 +566,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* i
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, IsR6(), GetAssembler());
+ GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, IsR6(), GetAssembler());
}
// int java.lang.Long.numberOfTrailingZeros(long i)
@@ -571,7 +575,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invo
}
void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, IsR6(), GetAssembler());
+ GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, IsR6(), GetAssembler());
}
// int java.lang.Integer.reverse(int)
@@ -584,7 +588,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) {
DataType::Type::kInt32,
IsR2OrNewer(),
IsR6(),
- /* reverseBits */ true,
+ /* reverseBits= */ true,
GetAssembler());
}
@@ -598,7 +602,7 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) {
DataType::Type::kInt64,
IsR2OrNewer(),
IsR6(),
- /* reverseBits */ true,
+ /* reverseBits= */ true,
GetAssembler());
}
@@ -612,6 +616,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
DataType::Type type,
bool isR6,
+ bool hasMsa,
MipsAssembler* assembler) {
Register out = locations->Out().AsRegister<Register>();
@@ -637,85 +642,102 @@ static void GenBitCount(LocationSummary* locations,
// instructions compared to a loop-based algorithm which required 47
// instructions.
- if (type == DataType::Type::kInt32) {
- Register in = locations->InAt(0).AsRegister<Register>();
-
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- if (isR6) {
- __ MulR6(out, out, TMP);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
} else {
- __ MulR2(out, out, TMP);
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ __ Mtc1(in_lo, FTMP);
+ __ Mthc1(in_hi, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
}
- __ Srl(out, out, 24);
} else {
- DCHECK_EQ(type, DataType::Type::kInt64);
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
- Register out_hi = locations->GetTemp(1).AsRegister<Register>();
- Register tmp_lo = TMP;
- Register out_lo = out;
+ if (type == DataType::Type::kInt32) {
+ Register in = locations->InAt(0).AsRegister<Register>();
- __ Srl(tmp_lo, in_lo, 1);
- __ Srl(tmp_hi, in_hi, 1);
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ if (isR6) {
+ __ MulR6(out, out, TMP);
+ } else {
+ __ MulR2(out, out, TMP);
+ }
+ __ Srl(out, out, 24);
+ } else {
+ DCHECK_EQ(type, DataType::Type::kInt64);
+ Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
+ Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
+ Register tmp_hi = locations->GetTemp(0).AsRegister<Register>();
+ Register out_hi = locations->GetTemp(1).AsRegister<Register>();
+ Register tmp_lo = TMP;
+ Register out_lo = out;
- __ LoadConst32(AT, 0x55555555);
+ __ Srl(tmp_lo, in_lo, 1);
+ __ Srl(tmp_hi, in_hi, 1);
- __ And(tmp_lo, tmp_lo, AT);
- __ Subu(tmp_lo, in_lo, tmp_lo);
+ __ LoadConst32(AT, 0x55555555);
- __ And(tmp_hi, tmp_hi, AT);
- __ Subu(tmp_hi, in_hi, tmp_hi);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Subu(tmp_lo, in_lo, tmp_lo);
- __ LoadConst32(AT, 0x33333333);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Subu(tmp_hi, in_hi, tmp_hi);
- __ And(out_lo, tmp_lo, AT);
- __ Srl(tmp_lo, tmp_lo, 2);
- __ And(tmp_lo, tmp_lo, AT);
- __ Addu(tmp_lo, out_lo, tmp_lo);
+ __ LoadConst32(AT, 0x33333333);
- __ And(out_hi, tmp_hi, AT);
- __ Srl(tmp_hi, tmp_hi, 2);
- __ And(tmp_hi, tmp_hi, AT);
- __ Addu(tmp_hi, out_hi, tmp_hi);
+ __ And(out_lo, tmp_lo, AT);
+ __ Srl(tmp_lo, tmp_lo, 2);
+ __ And(tmp_lo, tmp_lo, AT);
+ __ Addu(tmp_lo, out_lo, tmp_lo);
- // Here we deviate from the original algorithm a bit. We've reached
- // the stage where the bitfields holding the subtotals are large
- // enough to hold the combined subtotals for both the low word, and
- // the high word. This means that we can add the subtotals for the
- // the high, and low words into a single word, and compute the final
- // result for both the high, and low words using fewer instructions.
- __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out_hi, tmp_hi, AT);
+ __ Srl(tmp_hi, tmp_hi, 2);
+ __ And(tmp_hi, tmp_hi, AT);
+ __ Addu(tmp_hi, out_hi, tmp_hi);
- __ Addu(TMP, tmp_hi, tmp_lo);
+ // Here we deviate from the original algorithm a bit. We've reached
+ // the stage where the bitfields holding the subtotals are large
+ // enough to hold the combined subtotals for both the low word, and
+ // the high word. This means that we can add the subtotals for the
+ // the high, and low words into a single word, and compute the final
+ // result for both the high, and low words using fewer instructions.
+ __ LoadConst32(AT, 0x0F0F0F0F);
- __ Srl(out, TMP, 4);
- __ And(out, out, AT);
- __ And(TMP, TMP, AT);
- __ Addu(out, out, TMP);
+ __ Addu(TMP, tmp_hi, tmp_lo);
- __ LoadConst32(AT, 0x01010101);
+ __ Srl(out, TMP, 4);
+ __ And(out, out, AT);
+ __ And(TMP, TMP, AT);
+ __ Addu(out, out, TMP);
- if (isR6) {
- __ MulR6(out, out, AT);
- } else {
- __ MulR2(out, out, AT);
- }
+ __ LoadConst32(AT, 0x01010101);
- __ Srl(out, out, 24);
+ if (isR6) {
+ __ MulR6(out, out, AT);
+ } else {
+ __ MulR2(out, out, AT);
+ }
+
+ __ Srl(out, out, 24);
+ }
}
}
@@ -725,7 +747,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(int)
@@ -739,575 +761,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations,
- bool is64bit,
- bool isR2OrNewer,
- bool isR6,
- MipsAssembler* assembler) {
- FRegister in = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
-
- // Note, as a "quality of implementation", rather than pure "spec compliance", we require that
- // Math.abs() clears the sign bit (but changes nothing else) for all numbers, including NaN
- // (signaling NaN may become quiet though).
- //
- // The ABS.fmt instructions (abs.s and abs.d) do exactly that when NAN2008=1 (R6). For this case,
- // both regular floating point numbers and NAN values are treated alike, only the sign bit is
- // affected by this instruction.
- // But when NAN2008=0 (R2 and before), the ABS.fmt instructions can't be used. For this case, any
- // NaN operand signals invalid operation. This means that other bits (not just sign bit) might be
- // changed when doing abs(NaN). Because of that, we clear sign bit in a different way.
- if (isR6) {
- if (is64bit) {
- __ AbsD(out, in);
- } else {
- __ AbsS(out, in);
- }
- } else {
- if (is64bit) {
- if (in != out) {
- __ MovD(out, in);
- }
- __ MoveFromFpuHigh(TMP, in);
- // ins instruction is not available for R1.
- if (isR2OrNewer) {
- __ Ins(TMP, ZERO, 31, 1);
- } else {
- __ Sll(TMP, TMP, 1);
- __ Srl(TMP, TMP, 1);
- }
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mfc1(TMP, in);
- // ins instruction is not available for R1.
- if (isR2OrNewer) {
- __ Ins(TMP, ZERO, 31, 1);
- } else {
- __ Sll(TMP, TMP, 1);
- __ Srl(TMP, TMP, 1);
- }
- __ Mtc1(TMP, out);
- }
- }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, IsR2OrNewer(), IsR6(), GetAssembler());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssembler* assembler) {
- if (is64bit) {
- Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- // The comments in this section show the analogous operations which would
- // be performed if we had 64-bit registers "in", and "out".
- // __ Dsra32(AT, in, 31);
- __ Sra(AT, in_hi, 31);
- // __ Xor(out, in, AT);
- __ Xor(TMP, in_lo, AT);
- __ Xor(out_hi, in_hi, AT);
- // __ Dsubu(out, out, AT);
- __ Subu(out_lo, TMP, AT);
- __ Sltu(TMP, out_lo, TMP);
- __ Addu(out_hi, out_hi, TMP);
- } else {
- Register in = locations->InAt(0).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- __ Sra(AT, in, 31);
- __ Xor(out, in, AT);
- __ Subu(out, out, AT);
- }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- FRegister out = locations->Out().AsFpuRegister<FRegister>();
- FRegister a = locations->InAt(0).AsFpuRegister<FRegister>();
- FRegister b = locations->InAt(1).AsFpuRegister<FRegister>();
-
- if (is_R6) {
- MipsLabel noNaNs;
- MipsLabel done;
- FRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ B(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
- } else {
- MipsLabel ordered;
- MipsLabel compare;
- MipsLabel select;
- MipsLabel done;
-
- if (type == DataType::Type::kFloat64) {
- __ CunD(a, b);
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CunS(a, b);
- }
- __ Bc1f(&ordered);
-
- // a or b (or both) is a NaN. Return one, which is a NaN.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(b, b);
- } else {
- __ CeqS(b, b);
- }
- __ B(&select);
-
- __ Bind(&ordered);
-
- // Neither is a NaN.
- // a == b? (-0.0 compares equal with +0.0)
- // If equal, handle zeroes, else compare further.
- if (type == DataType::Type::kFloat64) {
- __ CeqD(a, b);
- } else {
- __ CeqS(a, b);
- }
- __ Bc1f(&compare);
-
- // a == b either bit for bit or one is -0.0 and the other is +0.0.
- if (type == DataType::Type::kFloat64) {
- __ MoveFromFpuHigh(TMP, a);
- __ MoveFromFpuHigh(AT, b);
- } else {
- __ Mfc1(TMP, a);
- __ Mfc1(AT, b);
- }
-
- if (is_min) {
- // -0.0 prevails over +0.0.
- __ Or(TMP, TMP, AT);
- } else {
- // +0.0 prevails over -0.0.
- __ And(TMP, TMP, AT);
- }
-
- if (type == DataType::Type::kFloat64) {
- __ Mfc1(AT, a);
- __ Mtc1(AT, out);
- __ MoveToFpuHigh(TMP, out);
- } else {
- __ Mtc1(TMP, out);
- }
- __ B(&done);
-
- __ Bind(&compare);
-
- if (type == DataType::Type::kFloat64) {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeD(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeD(b, a); // b <= a
- }
- } else {
- if (is_min) {
- // return (a <= b) ? a : b;
- __ ColeS(a, b);
- } else {
- // return (a >= b) ? a : b;
- __ ColeS(b, a); // b <= a
- }
- }
-
- __ Bind(&select);
-
- if (type == DataType::Type::kFloat64) {
- __ MovtD(out, a);
- __ MovfD(out, b);
- } else {
- __ MovtS(out, a);
- __ MovfS(out, b);
- }
-
- __ Bind(&done);
- }
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat64,
- IsR6(),
- GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kFloat32,
- IsR6(),
- GetAssembler());
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- bool is_R6,
- MipsAssembler* assembler) {
- if (is_R6) {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, MOVN, and MOVZ). The SELEQZ and SELNEZ instructions
- // always change the target (output) register. If the condition is
- // true the output register gets the contents of the "rs" register;
- // otherwise, the output register is set to zero. One consequence
- // of this is that to implement something like "rd = c==0 ? rs : rt"
- // MIPS64r6 needs to use a pair of SELEQZ/SELNEZ instructions.
- // After executing this pair of instructions one of the output
- // registers from the pair will necessarily contain zero. Then the
- // code ORs the output registers from the SELEQZ/SELNEZ instructions
- // to get the final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, b_hi, a_hi);
- __ Bne(b_hi, a_hi, &compare_done);
-
- __ Sltu(TMP, b_lo, a_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- __ Seleqz(AT, a_lo, TMP);
- __ Selnez(out_lo, b_lo, TMP); // Safe even if out_lo == a_lo/b_lo
- // because at this point we're
- // done using a_lo/b_lo.
- } else {
- __ Selnez(AT, a_lo, TMP);
- __ Seleqz(out_lo, b_lo, TMP); // ditto
- }
- __ Or(out_lo, out_lo, AT);
- if (is_min) {
- __ Seleqz(AT, a_hi, TMP);
- __ Selnez(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- } else {
- __ Selnez(AT, a_hi, TMP);
- __ Seleqz(out_hi, b_hi, TMP); // ditto but for out_hi & a_hi/b_hi
- }
- __ Or(out_hi, out_hi, AT);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, b, a);
- if (is_min) {
- __ Seleqz(TMP, a, AT);
- __ Selnez(AT, b, AT);
- } else {
- __ Selnez(TMP, a, AT);
- __ Seleqz(AT, b, AT);
- }
- __ Or(out, TMP, AT);
- }
- }
- } else {
- if (type == DataType::Type::kInt64) {
- Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>();
- Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
- Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>();
- Register b_hi = locations->InAt(1).AsRegisterPairHigh<Register>();
- Register out_lo = locations->Out().AsRegisterPairLow<Register>();
- Register out_hi = locations->Out().AsRegisterPairHigh<Register>();
-
- MipsLabel compare_done;
-
- if (a_lo == b_lo) {
- if (out_lo != a_lo) {
- __ Move(out_lo, a_lo);
- __ Move(out_hi, a_hi);
- }
- } else {
- __ Slt(TMP, a_hi, b_hi);
- __ Bne(a_hi, b_hi, &compare_done);
-
- __ Sltu(TMP, a_lo, b_lo);
-
- __ Bind(&compare_done);
-
- if (is_min) {
- if (out_lo != a_lo) {
- __ Movn(out_hi, a_hi, TMP);
- __ Movn(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movz(out_hi, b_hi, TMP);
- __ Movz(out_lo, b_lo, TMP);
- }
- } else {
- if (out_lo != a_lo) {
- __ Movz(out_hi, a_hi, TMP);
- __ Movz(out_lo, a_lo, TMP);
- }
- if (out_lo != b_lo) {
- __ Movn(out_hi, b_hi, TMP);
- __ Movn(out_lo, b_lo, TMP);
- }
- }
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kInt32);
- Register a = locations->InAt(0).AsRegister<Register>();
- Register b = locations->InAt(1).AsRegister<Register>();
- Register out = locations->Out().AsRegister<Register>();
-
- if (a == b) {
- if (out != a) {
- __ Move(out, a);
- }
- } else {
- __ Slt(AT, a, b);
- if (is_min) {
- if (out != a) {
- __ Movn(out, a, AT);
- }
- if (out != b) {
- __ Movz(out, b, AT);
- }
- } else {
- if (out != a) {
- __ Movz(out, a, AT);
- }
- if (out != b) {
- __ Movn(out, b, AT);
- }
- }
- }
- }
- }
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ true,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt32,
- IsR6(),
- GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(),
- /* is_min */ false,
- DataType::Type::kInt64,
- IsR6(),
- GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
@@ -1601,11 +1055,11 @@ static void GenUnsafeGet(HInvoke* invoke,
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
- /* offset */ 0U,
- /* index */ offset_loc,
+ /* offset= */ 0U,
+ /* index= */ offset_loc,
TIMES_1,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
if (is_volatile) {
__ Sync(0);
}
@@ -1623,8 +1077,8 @@ static void GenUnsafeGet(HInvoke* invoke,
trg_loc,
trg_loc,
base_loc,
- /* offset */ 0U,
- /* index */ offset_loc);
+ /* offset= */ 0U,
+ /* index= */ offset_loc);
}
} else {
if (is_R6) {
@@ -1653,7 +1107,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, IsR6(), codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, IsR6(), codegen_);
}
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
@@ -1662,7 +1116,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, IsR6(), codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, IsR6(), codegen_);
}
// long sun.misc.Unsafe.getLong(Object o, long offset)
@@ -1671,7 +1125,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, IsR6(), codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, IsR6(), codegen_);
}
// Object sun.misc.Unsafe.getObject(Object o, long offset)
@@ -1680,7 +1134,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, IsR6(), codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, IsR6(), codegen_);
}
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
@@ -1689,7 +1143,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke
}
void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, IsR6(), codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, IsR6(), codegen_);
}
static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -1771,8 +1225,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
IsR6(),
codegen_);
}
@@ -1785,8 +1239,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
IsR6(),
codegen_);
}
@@ -1799,8 +1253,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
IsR6(),
codegen_);
}
@@ -1813,8 +1267,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
IsR6(),
codegen_);
}
@@ -1827,8 +1281,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke)
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
IsR6(),
codegen_);
}
@@ -1841,8 +1295,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
IsR6(),
codegen_);
}
@@ -1855,8 +1309,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
IsR6(),
codegen_);
}
@@ -1869,8 +1323,8 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
IsR6(),
codegen_);
}
@@ -1934,12 +1388,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS* code
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
+ /* offset= */ 0u,
+ /* index= */ offset_loc,
ScaleFactor::TIMES_1,
temp,
- /* needs_null_check */ false,
- /* always_update_field */ true);
+ /* needs_null_check= */ false,
+ /* always_update_field= */ true);
}
}
@@ -2062,13 +1516,6 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) {
// boolean java.lang.String.equals(Object anObject)
void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
@@ -2128,8 +1575,16 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) {
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // /* HeapReference<Class> */ temp1 = str->klass_
__ Lw(temp1, str, class_offset);
+ // /* HeapReference<Class> */ temp2 = arg->klass_
__ Lw(temp2, arg, class_offset);
+ // Also, because we use the previously loaded class references only in the
+ // following comparison, we don't need to unpoison them.
__ Bne(temp1, temp2, &return_false);
}
@@ -2259,7 +1714,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, /* start_at_zero */ true, GetAssembler(), codegen_);
+ GenerateStringIndexOf(invoke, /* start_at_zero= */ true, GetAssembler(), codegen_);
}
// int java.lang.String.indexOf(int ch, int fromIndex)
@@ -2280,7 +1735,7 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, /* start_at_zero */ false, GetAssembler(), codegen_);
+ GenerateStringIndexOf(invoke, /* start_at_zero= */ false, GetAssembler(), codegen_);
}
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
@@ -3147,59 +2602,50 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
MipsAssembler* assembler = GetAssembler();
InstructionCodeGeneratorMIPS* icodegen =
down_cast<InstructionCodeGeneratorMIPS*>(codegen_->GetInstructionVisitor());
Register out = locations->Out().AsRegister<Register>();
- InvokeRuntimeCallingConvention calling_convention;
if (invoke->InputAt(0)->IsConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ LoadConst32(out, address);
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- uint32_t address =
- dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ LoadConst32(calling_convention.GetRegisterAt(0), address);
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
} else {
+ DCHECK(locations->CanCall());
Register in = locations->InAt(0).AsRegister<Register>();
MipsLabel allocate, done;
- int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
- // Is (info.low <= in) && (in <= info.high)?
__ Addiu32(out, in, -info.low);
- // As unsigned quantities is out < (info.high - info.low + 1)?
- if (IsInt<16>(count)) {
- __ Sltiu(AT, out, count);
+ // As unsigned quantities is out < info.length ?
+ if (IsUint<15>(info.length)) {
+ __ Sltiu(AT, out, info.length);
} else {
- __ LoadConst32(AT, count);
+ __ LoadConst32(AT, info.length);
__ Sltu(AT, out, AT);
}
- // Branch if out >= (info.high - info.low + 1).
- // This means that "in" is outside of the range [info.low, info.high].
+ // Branch if out >= info.length. This means that "in" is outside of the valid range.
__ Beqz(AT, &allocate);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- __ LoadConst32(TMP, data_offset + address);
+ codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference);
__ ShiftAndAdd(out, out, TMP, TIMES_4);
__ Lw(out, out, 0);
__ MaybeUnpoisonHeapReference(out);
@@ -3207,10 +2653,8 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) {
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ LoadConst32(calling_convention.GetRegisterAt(0), address);
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ StoreToOffset(kStoreWord, in, out, info.value_offset);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
@@ -3260,6 +2704,10 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(MIPS, SystemArrayCopy)
+UNIMPLEMENTED_INTRINSIC(MIPS, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(MIPS, CRC32UpdateByteBuffer)
+
UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
UNIMPLEMENTED_INTRINSIC(MIPS, StringBufferAppend);
diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h
index 13397f11d4..08d4e82139 100644
--- a/compiler/optimizing/intrinsics_mips.h
+++ b/compiler/optimizing/intrinsics_mips.h
@@ -30,14 +30,14 @@ namespace mips {
class CodeGeneratorMIPS;
class MipsAssembler;
-class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderMIPS final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen);
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS);
};
-class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorMIPS final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorMIPS(CodeGeneratorMIPS* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -71,6 +71,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor {
bool IsR2OrNewer() const;
bool IsR6() const;
bool Is32BitFPU() const;
+ bool HasMsa() const;
private:
MipsAssembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 4668c561ed..3e687652d3 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -46,6 +46,10 @@ ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() {
return codegen_->GetGraph()->GetAllocator();
}
+inline bool IntrinsicCodeGeneratorMIPS64::HasMsa() const {
+ return codegen_->GetInstructionSetFeatures().HasMsa();
+}
+
#define __ codegen->GetAssembler()->
static void MoveFromReturnRegister(Location trg,
@@ -93,7 +97,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
explicit IntrinsicSlowPathMIPS64(HInvoke* invoke)
: SlowPathCodeMIPS64(invoke), invoke_(invoke) { }
- void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen_in) override {
CodeGeneratorMIPS64* codegen = down_cast<CodeGeneratorMIPS64*>(codegen_in);
__ Bind(GetEntryLabel());
@@ -122,7 +126,7 @@ class IntrinsicSlowPathMIPS64 : public SlowPathCodeMIPS64 {
__ Bc(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPathMIPS64"; }
+ const char* GetDescription() const override { return "IntrinsicSlowPathMIPS64"; }
private:
// The instruction where this slow path is happening.
@@ -165,7 +169,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* in
}
void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
// int java.lang.Float.floatToRawIntBits(float)
@@ -174,7 +178,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invok
}
void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -201,7 +205,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invok
}
void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
// float java.lang.Float.intBitsToFloat(int)
@@ -210,7 +214,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke)
}
void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -291,7 +295,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke*
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
// int java.lang.Long.numberOfLeadingZeros(long i)
@@ -300,7 +304,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ GenNumberOfLeadingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
static void GenNumberOfTrailingZeroes(LocationSummary* locations,
@@ -328,7 +332,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke*
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
// int java.lang.Long.numberOfTrailingZeros(long i)
@@ -337,7 +341,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ GenNumberOfTrailingZeroes(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
static void GenReverse(LocationSummary* locations,
@@ -386,6 +390,7 @@ static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
static void GenBitCount(LocationSummary* locations,
const DataType::Type type,
+ const bool hasMsa,
Mips64Assembler* assembler) {
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
@@ -414,41 +419,52 @@ static void GenBitCount(LocationSummary* locations,
// bits are set but the algorithm here attempts to minimize the total
// number of instructions executed even when a large number of bits
// are set.
-
- if (type == DataType::Type::kInt32) {
- __ Srl(TMP, in, 1);
- __ LoadConst32(AT, 0x55555555);
- __ And(TMP, TMP, AT);
- __ Subu(TMP, in, TMP);
- __ LoadConst32(AT, 0x33333333);
- __ And(out, TMP, AT);
- __ Srl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Addu(TMP, out, TMP);
- __ Srl(out, TMP, 4);
- __ Addu(out, out, TMP);
- __ LoadConst32(AT, 0x0F0F0F0F);
- __ And(out, out, AT);
- __ LoadConst32(TMP, 0x01010101);
- __ MulR6(out, out, TMP);
- __ Srl(out, out, 24);
- } else if (type == DataType::Type::kInt64) {
- __ Dsrl(TMP, in, 1);
- __ LoadConst64(AT, 0x5555555555555555L);
- __ And(TMP, TMP, AT);
- __ Dsubu(TMP, in, TMP);
- __ LoadConst64(AT, 0x3333333333333333L);
- __ And(out, TMP, AT);
- __ Dsrl(TMP, TMP, 2);
- __ And(TMP, TMP, AT);
- __ Daddu(TMP, out, TMP);
- __ Dsrl(out, TMP, 4);
- __ Daddu(out, out, TMP);
- __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
- __ And(out, out, AT);
- __ LoadConst64(TMP, 0x0101010101010101L);
- __ Dmul(out, out, TMP);
- __ Dsrl32(out, out, 24);
+ if (hasMsa) {
+ if (type == DataType::Type::kInt32) {
+ __ Mtc1(in, FTMP);
+ __ PcntW(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Mfc1(out, FTMP);
+ } else {
+ __ Dmtc1(in, FTMP);
+ __ PcntD(static_cast<VectorRegister>(FTMP), static_cast<VectorRegister>(FTMP));
+ __ Dmfc1(out, FTMP);
+ }
+ } else {
+ if (type == DataType::Type::kInt32) {
+ __ Srl(TMP, in, 1);
+ __ LoadConst32(AT, 0x55555555);
+ __ And(TMP, TMP, AT);
+ __ Subu(TMP, in, TMP);
+ __ LoadConst32(AT, 0x33333333);
+ __ And(out, TMP, AT);
+ __ Srl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Addu(TMP, out, TMP);
+ __ Srl(out, TMP, 4);
+ __ Addu(out, out, TMP);
+ __ LoadConst32(AT, 0x0F0F0F0F);
+ __ And(out, out, AT);
+ __ LoadConst32(TMP, 0x01010101);
+ __ MulR6(out, out, TMP);
+ __ Srl(out, out, 24);
+ } else {
+ __ Dsrl(TMP, in, 1);
+ __ LoadConst64(AT, 0x5555555555555555L);
+ __ And(TMP, TMP, AT);
+ __ Dsubu(TMP, in, TMP);
+ __ LoadConst64(AT, 0x3333333333333333L);
+ __ And(out, TMP, AT);
+ __ Dsrl(TMP, TMP, 2);
+ __ And(TMP, TMP, AT);
+ __ Daddu(TMP, out, TMP);
+ __ Dsrl(out, TMP, 4);
+ __ Daddu(out, out, TMP);
+ __ LoadConst64(AT, 0x0F0F0F0F0F0F0F0FL);
+ __ And(out, out, AT);
+ __ LoadConst64(TMP, 0x0101010101010101L);
+ __ Dmul(out, out, TMP);
+ __ Dsrl32(out, out, 24);
+ }
}
}
@@ -458,7 +474,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, HasMsa(), GetAssembler());
}
// int java.lang.Long.bitCount(long)
@@ -467,291 +483,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
-}
-
-static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
- FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- if (is64bit) {
- __ AbsD(out, in);
- } else {
- __ AbsS(out, in);
- }
-}
-
-// double java.lang.Math.abs(double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-// float java.lang.Math.abs(float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) {
- GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (is64bit) {
- __ Dsra32(AT, in, 31);
- __ Xor(out, in, AT);
- __ Dsubu(out, out, AT);
- } else {
- __ Sra(AT, in, 31);
- __ Xor(out, in, AT);
- __ Subu(out, out, AT);
- }
-}
-
-// int java.lang.Math.abs(int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-// long java.lang.Math.abs(long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToInt(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- DataType::Type type,
- Mips64Assembler* assembler) {
- FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>();
- FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>();
- FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
-
- Mips64Label noNaNs;
- Mips64Label done;
- FpuRegister ftmp = ((out != a) && (out != b)) ? out : FTMP;
-
- // When Java computes min/max it prefers a NaN to a number; the
- // behavior of MIPSR6 is to prefer numbers to NaNs, i.e., if one of
- // the inputs is a NaN and the other is a valid number, the MIPS
- // instruction will return the number; Java wants the NaN value
- // returned. This is why there is extra logic preceding the use of
- // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a
- // NaN, return the NaN, otherwise return the min/max.
- if (type == DataType::Type::kFloat64) {
- __ CmpUnD(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqD(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelD(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovD(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinD(out, a, b);
- } else {
- __ MaxD(out, a, b);
- }
- } else {
- DCHECK_EQ(type, DataType::Type::kFloat32);
- __ CmpUnS(FTMP, a, b);
- __ Bc1eqz(FTMP, &noNaNs);
-
- // One of the inputs is a NaN
- __ CmpEqS(ftmp, a, a);
- // If a == a then b is the NaN, otherwise a is the NaN.
- __ SelS(ftmp, a, b);
-
- if (ftmp != out) {
- __ MovS(out, ftmp);
- }
-
- __ Bc(&done);
-
- __ Bind(&noNaNs);
-
- if (is_min) {
- __ MinS(out, a, b);
- } else {
- __ MaxS(out, a, b);
- }
- }
-
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
-}
-
-// double java.lang.Math.min(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.min(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler());
-}
-
-// double java.lang.Math.max(double, double)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler());
-}
-
-// float java.lang.Math.max(float, float)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler());
-}
-
-static void GenMinMax(LocationSummary* locations,
- bool is_min,
- Mips64Assembler* assembler) {
- GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>();
- GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>();
- GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
- if (lhs == rhs) {
- if (out != lhs) {
- __ Move(out, lhs);
- }
- } else {
- // Some architectures, such as ARM and MIPS (prior to r6), have a
- // conditional move instruction which only changes the target
- // (output) register if the condition is true (MIPS prior to r6 had
- // MOVF, MOVT, and MOVZ). The SELEQZ and SELNEZ instructions always
- // change the target (output) register. If the condition is true the
- // output register gets the contents of the "rs" register; otherwise,
- // the output register is set to zero. One consequence of this is
- // that to implement something like "rd = c==0 ? rs : rt" MIPS64r6
- // needs to use a pair of SELEQZ/SELNEZ instructions. After
- // executing this pair of instructions one of the output registers
- // from the pair will necessarily contain zero. Then the code ORs the
- // output registers from the SELEQZ/SELNEZ instructions to get the
- // final result.
- //
- // The initial test to see if the output register is same as the
- // first input register is needed to make sure that value in the
- // first input register isn't clobbered before we've finished
- // computing the output value. The logic in the corresponding else
- // clause performs the same task but makes sure the second input
- // register isn't clobbered in the event that it's the same register
- // as the output register; the else clause also handles the case
- // where the output register is distinct from both the first, and the
- // second input registers.
- if (out == lhs) {
- __ Slt(AT, rhs, lhs);
- if (is_min) {
- __ Seleqz(out, lhs, AT);
- __ Selnez(AT, rhs, AT);
- } else {
- __ Selnez(out, lhs, AT);
- __ Seleqz(AT, rhs, AT);
- }
- } else {
- __ Slt(AT, lhs, rhs);
- if (is_min) {
- __ Seleqz(out, rhs, AT);
- __ Selnez(AT, lhs, AT);
- } else {
- __ Selnez(out, rhs, AT);
- __ Seleqz(AT, lhs, AT);
- }
- }
- __ Or(out, out, AT);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-// int java.lang.Math.min(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// long java.lang.Math.min(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, GetAssembler());
-}
-
-// int java.lang.Math.max(int, int)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
-}
-
-// long java.lang.Math.max(long, long)
-void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, GetAssembler());
+ GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, HasMsa(), GetAssembler());
}
// double java.lang.Math.sqrt(double)
@@ -1179,11 +911,11 @@ static void GenUnsafeGet(HInvoke* invoke,
codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
trg_loc,
base,
- /* offset */ 0U,
- /* index */ offset_loc,
+ /* offset= */ 0U,
+ /* index= */ offset_loc,
TIMES_1,
temp,
- /* needs_null_check */ false);
+ /* needs_null_check= */ false);
if (is_volatile) {
__ Sync(0);
}
@@ -1196,8 +928,8 @@ static void GenUnsafeGet(HInvoke* invoke,
trg_loc,
trg_loc,
base_loc,
- /* offset */ 0U,
- /* index */ offset_loc);
+ /* offset= */ 0U,
+ /* index= */ offset_loc);
}
} else {
__ Lwu(trg, TMP, 0);
@@ -1220,7 +952,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
// int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
@@ -1229,7 +961,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
// long sun.misc.Unsafe.getLong(Object o, long offset)
@@ -1238,7 +970,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
@@ -1247,7 +979,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
// Object sun.misc.Unsafe.getObject(Object o, long offset)
@@ -1256,7 +988,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
// Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
@@ -1265,7 +997,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invo
}
void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -1335,8 +1067,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1348,8 +1080,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
@@ -1361,8 +1093,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt32,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1374,8 +1106,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1387,8 +1119,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invok
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
@@ -1400,8 +1132,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invo
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kReference,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1413,8 +1145,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ false,
+ /* is_volatile= */ false,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1426,8 +1158,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke)
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ false,
- /* is_ordered */ true,
+ /* is_volatile= */ false,
+ /* is_ordered= */ true,
codegen_);
}
@@ -1439,8 +1171,8 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke
void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
GenUnsafePut(invoke->GetLocations(),
DataType::Type::kInt64,
- /* is_volatile */ true,
- /* is_ordered */ false,
+ /* is_volatile= */ true,
+ /* is_ordered= */ false,
codegen_);
}
@@ -1502,12 +1234,12 @@ static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS64* co
invoke,
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
- /* offset */ 0u,
- /* index */ offset_loc,
+ /* offset= */ 0u,
+ /* index= */ offset_loc,
ScaleFactor::TIMES_1,
temp,
- /* needs_null_check */ false,
- /* always_update_field */ true);
+ /* needs_null_check= */ false,
+ /* always_update_field= */ true);
}
}
@@ -1637,13 +1369,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) {
// boolean java.lang.String.equals(Object anObject)
void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1704,8 +1429,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) {
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // /* HeapReference<Class> */ temp1 = str->klass_
__ Lw(temp1, str, class_offset);
+ // /* HeapReference<Class> */ temp2 = arg->klass_
__ Lw(temp2, arg, class_offset);
+ // Also, because we use the previously loaded class references only in the
+ // following comparison, we don't need to unpoison them.
__ Bnec(temp1, temp2, &return_false);
}
@@ -1823,7 +1556,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
}
// int java.lang.String.indexOf(int ch, int fromIndex)
@@ -1841,7 +1574,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
}
// java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount)
@@ -1942,7 +1675,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) {
- GenIsInfinite(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ GenIsInfinite(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
// boolean java.lang.Double.isInfinite(double)
@@ -1951,7 +1684,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) {
- GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ GenIsInfinite(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
// void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
@@ -2535,54 +2268,45 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
Mips64Assembler* assembler = GetAssembler();
InstructionCodeGeneratorMIPS64* icodegen =
down_cast<InstructionCodeGeneratorMIPS64*>(codegen_->GetInstructionVisitor());
GpuRegister out = locations->Out().AsRegister<GpuRegister>();
- InvokeRuntimeCallingConvention calling_convention;
if (invoke->InputAt(0)->IsConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ LoadConst64(out, address);
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- uint32_t address =
- dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ LoadConst64(calling_convention.GetRegisterAt(0), address);
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ StoreConstToOffset(kStoreWord, value, out, info.value_offset, TMP);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
icodegen->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
}
} else {
+ DCHECK(locations->CanCall());
GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>();
Mips64Label allocate, done;
- int32_t count = static_cast<uint32_t>(info.high) - info.low + 1;
- // Is (info.low <= in) && (in <= info.high)?
__ Addiu32(out, in, -info.low);
- // As unsigned quantities is out < (info.high - info.low + 1)?
- __ LoadConst32(AT, count);
- // Branch if out >= (info.high - info.low + 1).
- // This means that "in" is outside of the range [info.low, info.high].
+ // As unsigned quantities is out < info.length ?
+ __ LoadConst32(AT, info.length);
+ // Branch if out >= info.length . This means that "in" is outside of the valid range.
__ Bgeuc(out, AT, &allocate);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- __ LoadConst64(TMP, data_offset + address);
+ codegen_->LoadBootImageAddress(TMP, info.array_data_boot_image_reference);
__ Dlsa(out, out, TMP, TIMES_4);
__ Lwu(out, out, 0);
__ MaybeUnpoisonHeapReference(out);
@@ -2590,10 +2314,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) {
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ LoadConst64(calling_convention.GetRegisterAt(0), address);
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ StoreToOffset(kStoreWord, in, out, info.value_offset);
// `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
// one.
@@ -2632,6 +2354,9 @@ void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIB
UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy)
+UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(MIPS64, CRC32UpdateByteBuffer)
UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h
index 6f40d90ddb..ca8bc8f55a 100644
--- a/compiler/optimizing/intrinsics_mips64.h
+++ b/compiler/optimizing/intrinsics_mips64.h
@@ -30,14 +30,14 @@ namespace mips64 {
class CodeGeneratorMIPS64;
class Mips64Assembler;
-class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderMIPS64 final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen);
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -55,19 +55,21 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64);
};
-class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorMIPS64 final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
#undef OPTIMIZING_INTRINSICS
+ bool HasMsa() const;
+
private:
Mips64Assembler* GetAssembler();
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index 8c69d9b643..41947f1ccd 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -47,7 +47,7 @@ class IntrinsicSlowPath : public SlowPathCode {
return calling_convention_visitor.GetMethodLocation();
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
Assembler* assembler = codegen->GetAssembler();
assembler->Bind(GetEntryLabel());
@@ -73,7 +73,7 @@ class IntrinsicSlowPath : public SlowPathCode {
assembler->Jump(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+ const char* GetDescription() const override { return "IntrinsicSlowPath"; }
private:
// The instruction where this slow path is happening.
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 0763ef2352..de697f0f96 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -40,11 +40,6 @@ namespace art {
namespace x86 {
-static constexpr int kDoubleNaNHigh = 0x7FF80000;
-static constexpr int kDoubleNaNLow = 0x00000000;
-static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
-static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
-
IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
: allocator_(codegen->GetGraph()->GetAllocator()),
codegen_(codegen) {
@@ -87,7 +82,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
DCHECK(kUseBakerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -165,7 +160,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
+ const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
private:
DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
@@ -228,31 +223,31 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler*
}
void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true);
+ CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
}
void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true);
+ CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
}
void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false);
+ CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
}
void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false);
+ CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
}
void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -333,432 +328,6 @@ void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-// need is 64b.
-
-static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) {
- // TODO: Enable memory operations when the assembler supports them.
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- // We need addressibility for the constant area.
- locations->SetInAt(1, Location::RequiresRegister());
- // We need a temporary to hold the constant.
- locations->AddTemp(Location::RequiresFpuRegister());
- }
-}
-
-static void MathAbsFP(HInvoke* invoke,
- bool is64bit,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location output = locations->Out();
-
- DCHECK(output.IsFpuRegister());
- if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(1).IsRegister());
- // We also have a constant area pointer.
- Register constant_area = locations->InAt(1).AsRegister<Register>();
- XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
- if (is64bit) {
- __ movsd(temp, codegen->LiteralInt64Address(
- INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
- __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
- } else {
- __ movss(temp, codegen->LiteralInt32Address(
- INT32_C(0x7FFFFFFF), method_address, constant_area));
- __ andps(output.AsFpuRegister<XmmRegister>(), temp);
- }
- } else {
- // Create the right constant on an aligned stack.
- if (is64bit) {
- __ subl(ESP, Immediate(8));
- __ pushl(Immediate(0x7FFFFFFF));
- __ pushl(Immediate(0xFFFFFFFF));
- __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
- } else {
- __ subl(ESP, Immediate(12));
- __ pushl(Immediate(0x7FFFFFFF));
- __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
- }
- __ addl(ESP, Immediate(16));
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFloatToFloat(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RegisterLocation(EAX));
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RegisterLocation(EDX));
-}
-
-static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
- Location output = locations->Out();
- Register out = output.AsRegister<Register>();
- DCHECK_EQ(out, EAX);
- Register temp = locations->GetTemp(0).AsRegister<Register>();
- DCHECK_EQ(temp, EDX);
-
- // Sign extend EAX into EDX.
- __ cdq();
-
- // XOR EAX with sign.
- __ xorl(EAX, EDX);
-
- // Subtract out sign to correct.
- __ subl(EAX, EDX);
-
- // The result is in EAX.
-}
-
-static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
- Location input = locations->InAt(0);
- Register input_lo = input.AsRegisterPairLow<Register>();
- Register input_hi = input.AsRegisterPairHigh<Register>();
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Compute the sign into the temporary.
- __ movl(temp, input_hi);
- __ sarl(temp, Immediate(31));
-
- // Store the sign into the output.
- __ movl(output_lo, temp);
- __ movl(output_hi, temp);
-
- // XOR the input to the output.
- __ xorl(output_lo, input_lo);
- __ xorl(output_hi, input_hi);
-
- // Subtract the sign.
- __ subl(output_lo, temp);
- __ sbbl(output_hi, temp);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
- CreateAbsIntLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
- CreateAbsLongLocation(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsLong(invoke->GetLocations(), GetAssembler());
-}
-
-static void GenMinMaxFP(HInvoke* invoke,
- bool is_min,
- bool is_double,
- X86Assembler* assembler,
- CodeGeneratorX86* codegen) {
- LocationSummary* locations = invoke->GetLocations();
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- // Do we have a constant area pointer?
- if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
- HX86ComputeBaseMethodAddress* method_address =
- invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
- DCHECK(locations->InAt(2).IsRegister());
- Register constant_area = locations->InAt(2).AsRegister<Register>();
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
- } else {
- __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
- }
- } else {
- if (is_double) {
- __ pushl(Immediate(kDoubleNaNHigh));
- __ pushl(Immediate(kDoubleNaNLow));
- __ movsd(out, Address(ESP, 0));
- __ addl(ESP, Immediate(8));
- } else {
- __ pushl(Immediate(kFloatNaN));
- __ movss(out, Address(ESP, 0));
- __ addl(ESP, Immediate(4));
- }
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
- HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
- DCHECK(static_or_direct != nullptr);
- if (static_or_direct->HasSpecialInput() &&
- invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
- locations->SetInAt(2, Location::RequiresRegister());
- }
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ true,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ true,
- GetAssembler(),
- codegen_);
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFPLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(invoke,
- /* is_min */ false,
- /* is_double */ false,
- GetAssembler(),
- codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- if (is_long) {
- // Need to perform a subtract to get the sign right.
- // op1 is already in the same location as the output.
- Location output = locations->Out();
- Register output_lo = output.AsRegisterPairLow<Register>();
- Register output_hi = output.AsRegisterPairHigh<Register>();
-
- Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
- Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
-
- // Spare register to compute the subtraction to set condition code.
- Register temp = locations->GetTemp(0).AsRegister<Register>();
-
- // Subtract off op2_low.
- __ movl(temp, output_lo);
- __ subl(temp, op2_lo);
-
- // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
- __ movl(temp, output_hi);
- __ sbbl(temp, op2_hi);
-
- // Now the condition code is correct.
- Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
- __ cmovl(cond, output_lo, op2_lo);
- __ cmovl(cond, output_hi, op2_hi);
- } else {
- Register out = locations->Out().AsRegister<Register>();
- Register op2 = op2_loc.AsRegister<Register>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- __ cmpl(out, op2);
- Condition cond = is_min ? Condition::kGreater : Condition::kLess;
- __ cmovl(cond, out, op2);
- }
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- // Register to use to perform a long subtract to set cc.
- locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateLongLongToLongLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
@@ -976,6 +545,96 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry
__ cfi().AdjustCFAOffset(-16);
}
+static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
+ LocationSummary* locations =
+ new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
+ if (is_long) {
+ locations->SetInAt(0, Location::RequiresRegister());
+ } else {
+ locations->SetInAt(0, Location::Any());
+ }
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+static void GenLowestOneBit(X86Assembler* assembler,
+ CodeGeneratorX86* codegen,
+ bool is_long,
+ HInvoke* invoke) {
+ LocationSummary* locations = invoke->GetLocations();
+ Location src = locations->InAt(0);
+ Location out_loc = locations->Out();
+
+ if (invoke->InputAt(0)->IsConstant()) {
+ // Evaluate this at compile time.
+ int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
+ if (value == 0) {
+ if (is_long) {
+ __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
+ __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
+ } else {
+ __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
+ }
+ return;
+ }
+ // Nonzero value.
+ value = is_long ? CTZ(static_cast<uint64_t>(value))
+ : CTZ(static_cast<uint32_t>(value));
+ if (is_long) {
+ if (value >= 32) {
+ int shift = value-32;
+ codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
+ codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
+ } else {
+ codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
+ codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
+ }
+ } else {
+ codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
+ }
+ return;
+ }
+ // Handle non constant case
+ if (is_long) {
+ DCHECK(src.IsRegisterPair());
+ Register src_lo = src.AsRegisterPairLow<Register>();
+ Register src_hi = src.AsRegisterPairHigh<Register>();
+
+ Register out_lo = out_loc.AsRegisterPairLow<Register>();
+ Register out_hi = out_loc.AsRegisterPairHigh<Register>();
+
+ __ movl(out_lo, src_lo);
+ __ movl(out_hi, src_hi);
+
+ __ negl(out_lo);
+ __ adcl(out_hi, Immediate(0));
+ __ negl(out_hi);
+
+ __ andl(out_lo, src_lo);
+ __ andl(out_hi, src_hi);
+ } else {
+ if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
+ Register out = out_loc.AsRegister<Register>();
+ __ blsi(out, src.AsRegister<Register>());
+ } else {
+ Register out = out_loc.AsRegister<Register>();
+ // Do tmp & -tmp
+ if (src.IsRegister()) {
+ __ movl(out, src.AsRegister<Register>());
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(out, Address(ESP, src.GetStackIndex()));
+ }
+ __ negl(out);
+
+ if (src.IsRegister()) {
+ __ andl(out, src.AsRegister<Register>());
+ } else {
+ __ andl(out, Address(ESP, src.GetStackIndex()));
+ }
+ }
+ }
+}
+
void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
CreateFPToFPCallLocations(allocator_, invoke);
}
@@ -1088,6 +747,21 @@ void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
GenFPToFPCall(invoke, codegen_, kQuickTanh);
}
+void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
+}
+void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
+}
+
+void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
+ CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
+}
+
+void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
+ GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
+}
+
static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
@@ -1353,13 +1027,6 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1405,7 +1072,15 @@ void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // Also, because we use the loaded class references only to compare them, we
+ // don't need to unpoison them.
+ // /* HeapReference<Class> */ ecx = str->klass_
__ movl(ecx, Address(str, class_offset));
+ // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
__ cmpl(ecx, Address(arg, class_offset));
__ j(kNotEqual, &return_false);
}
@@ -1650,19 +1325,19 @@ static void GenerateStringIndexOf(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
- CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true);
+ CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
}
void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
}
void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
- CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false);
+ CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
}
void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
}
void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -2024,7 +1699,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check= */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -2095,45 +1770,45 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false);
+ allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true);
+ CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile= */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false);
+ allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true);
+ CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile= */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false);
+ allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
CreateIntIntIntToIntLocations(
- allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true);
+ allocator_, invoke, DataType::Type::kReference, /* is_volatile= */ true);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
@@ -2160,39 +1835,39 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator
void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true);
+ allocator_, DataType::Type::kInt32, invoke, /* is_volatile= */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true);
+ allocator_, DataType::Type::kReference, invoke, /* is_volatile= */ true);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false);
+ allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ false);
}
void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
CreateIntIntIntIntToVoidPlusTempsLocations(
- allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true);
+ allocator_, DataType::Type::kInt64, invoke, /* is_volatile= */ true);
}
// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
@@ -2244,34 +1919,34 @@ static void GenUnsafePut(LocationSummary* locations,
}
void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
@@ -2368,8 +2043,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codeg
temp1_loc, // Unused, used only as a "temporary" within the read barrier.
base,
field_addr,
- /* needs_null_check */ false,
- /* always_update_field */ true,
+ /* needs_null_check= */ false,
+ /* always_update_field= */ true,
&temp2);
}
@@ -2600,19 +2275,19 @@ static void GenBitCount(X86Assembler* assembler,
}
void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
- CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false);
+ CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
}
void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
- CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true);
+ CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
}
void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
@@ -2704,19 +2379,19 @@ static void GenLeadingZeros(X86Assembler* assembler,
}
void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false);
+ CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
}
void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true);
+ CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
}
void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
@@ -2795,19 +2470,19 @@ static void GenTrailingZeros(X86Assembler* assembler,
}
void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false);
+ CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
}
void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true);
+ CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
}
void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
@@ -3015,11 +2690,11 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
// Bail out if the source is not a non primitive array.
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
@@ -3052,7 +2727,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
// Bail out if the destination is not a non primitive array.
@@ -3064,7 +2739,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// temporaries such a `temp1`.
// /* HeapReference<Class> */ temp2 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(temp2, temp2);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp2` has been unpoisoned
@@ -3077,7 +2752,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
// Note: if heap poisoning is on, we are comparing two unpoisoned references here.
__ cmpl(temp1, temp2);
@@ -3086,7 +2761,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kEqual, &do_copy);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
// comparison with null below, and this reference is not
@@ -3140,10 +2815,10 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(temp1, temp1);
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `temp1` has been unpoisoned
@@ -3212,7 +2887,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kEqual, &done);
// Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
@@ -3276,22 +2951,36 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+ DCHECK(invoke->IsInvokeStaticOrDirect());
InvokeRuntimeCallingConvention calling_convention;
IntrinsicVisitor::ComputeIntegerValueOfLocations(
invoke,
codegen_,
Location::RegisterLocation(EAX),
Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+
+ LocationSummary* locations = invoke->GetLocations();
+ if (locations != nullptr) {
+ HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
+ if (invoke_static_or_direct->HasSpecialInput() &&
+ invoke->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
+ ->IsX86ComputeBaseMethodAddress()) {
+ locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
+ Location::RequiresRegister());
+ }
+ }
}
void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ DCHECK(invoke->IsInvokeStaticOrDirect());
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
X86Assembler* assembler = GetAssembler();
@@ -3299,42 +2988,58 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
InvokeRuntimeCallingConvention calling_convention;
if (invoke->InputAt(0)->IsConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ movl(out, Immediate(address));
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(
+ out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ movl(Address(out, info.value_offset), Immediate(value));
}
} else {
+ DCHECK(locations->CanCall());
Register in = locations->InAt(0).AsRegister<Register>();
// Check bounds of our cache.
__ leal(out, Address(in, -info.low));
- __ cmpl(out, Immediate(info.high - info.low + 1));
+ __ cmpl(out, Immediate(info.length));
NearLabel allocate, done;
__ j(kAboveEqual, &allocate);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- __ movl(out, Address(out, TIMES_4, data_offset + address));
+ constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
+ static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
+ "Check heap reference size.");
+ if (codegen_->GetCompilerOptions().IsBootImage()) {
+ DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+ size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
+ HX86ComputeBaseMethodAddress* method_address =
+ invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
+ DCHECK(method_address != nullptr);
+ Register method_address_reg =
+ invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
+ __ movl(out, Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kDummy32BitOffset));
+ codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
+ } else {
+ // Note: We're about to clobber the index in `out`, so we need to use `in` and
+ // adjust the offset accordingly.
+ uint32_t mid_array_boot_image_offset =
+ info.array_data_boot_image_reference - info.low * kElementSize;
+ codegen_->LoadBootImageAddress(
+ out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
+ DCHECK_NE(out, in);
+ __ movl(out, Address(out, in, TIMES_4, 0));
+ }
__ MaybeUnpoisonHeapReference(out);
__ jmp(&done);
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ movl(Address(out, info.value_offset), in);
__ Bind(&done);
}
@@ -3373,8 +3078,9 @@ UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
-UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(X86, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(X86, CRC32UpdateByteBuffer)
UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h
index e3555e78fc..ae150dad43 100644
--- a/compiler/optimizing/intrinsics_x86.h
+++ b/compiler/optimizing/intrinsics_x86.h
@@ -30,14 +30,14 @@ namespace x86 {
class CodeGeneratorX86;
class X86Assembler;
-class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderX86 final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen);
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86);
};
-class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorX86 final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorX86(CodeGeneratorX86* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 91a505ede1..e79c0c9adf 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -80,7 +80,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
DCHECK(kUseBakerReadBarrier);
}
- void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+ void EmitNativeCode(CodeGenerator* codegen) override {
CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
LocationSummary* locations = instruction_->GetLocations();
DCHECK(locations->CanCall());
@@ -118,7 +118,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
__ jmp(GetExitLabel());
}
- const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
+ const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
private:
DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
@@ -162,10 +162,10 @@ void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invok
}
void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
}
void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
@@ -176,10 +176,10 @@ void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke)
}
void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
- MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
- MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+ MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
}
static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -236,304 +236,6 @@ void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
}
-
-// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
-// need is 64b.
-
-static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) {
- // TODO: Enable memory operations when the assembler supports them.
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
-}
-
-static void MathAbsFP(LocationSummary* locations,
- bool is64bit,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location output = locations->Out();
-
- DCHECK(output.IsFpuRegister());
- XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
-
- // TODO: Can mask directly with constant area using pand if we can guarantee
- // that the literal is aligned on a 16 byte boundary. This will avoid a
- // temporary.
- if (is64bit) {
- __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
- __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- } else {
- __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
- __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- CreateFloatToFloatPlusTemps(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
- MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_);
-}
-
-static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
- locations->AddTemp(Location::RequiresRegister());
-}
-
-static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
- Location output = locations->Out();
- CpuRegister out = output.AsRegister<CpuRegister>();
- CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
-
- if (is64bit) {
- // Create mask.
- __ movq(mask, out);
- __ sarq(mask, Immediate(63));
- // Add mask.
- __ addq(out, mask);
- __ xorq(out, mask);
- } else {
- // Create mask.
- __ movl(mask, out);
- __ sarl(mask, Immediate(31));
- // Add mask.
- __ addl(out, mask);
- __ xorl(out, mask);
- }
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
- CreateIntToIntPlusTemp(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
- GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
-}
-
-static void GenMinMaxFP(LocationSummary* locations,
- bool is_min,
- bool is_double,
- X86_64Assembler* assembler,
- CodeGeneratorX86_64* codegen) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
- Location out_loc = locations->Out();
- XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- DCHECK(out_loc.Equals(op1_loc));
- return;
- }
-
- // (out := op1)
- // out <=? op2
- // if Nan jmp Nan_label
- // if out is min jmp done
- // if op2 is min jmp op2_label
- // handle -0/+0
- // jmp done
- // Nan_label:
- // out := NaN
- // op2_label:
- // out := op2
- // done:
- //
- // This removes one jmp, but needs to copy one input (op1) to out.
- //
- // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
-
- XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
-
- NearLabel nan, done, op2_label;
- if (is_double) {
- __ ucomisd(out, op2);
- } else {
- __ ucomiss(out, op2);
- }
-
- __ j(Condition::kParityEven, &nan);
-
- __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
- __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
-
- // Handle 0.0/-0.0.
- if (is_min) {
- if (is_double) {
- __ orpd(out, op2);
- } else {
- __ orps(out, op2);
- }
- } else {
- if (is_double) {
- __ andpd(out, op2);
- } else {
- __ andps(out, op2);
- }
- }
- __ jmp(&done);
-
- // NaN handling.
- __ Bind(&nan);
- if (is_double) {
- __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
- } else {
- __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
- }
- __ jmp(&done);
-
- // out := op2;
- __ Bind(&op2_label);
- if (is_double) {
- __ movsd(out, op2);
- } else {
- __ movss(out, op2);
- }
-
- // Done.
- __ Bind(&done);
-}
-
-static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresFpuRegister());
- locations->SetInAt(1, Location::RequiresFpuRegister());
- // The following is sub-optimal, but all we can do for now. It would be fine to also accept
- // the second input to be the output (we can simply swap inputs).
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ true, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ true, GetAssembler(), codegen_);
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- CreateFPFPToFP(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
- GenMinMaxFP(
- invoke->GetLocations(), /* is_min */ false, /* is_double */ false, GetAssembler(), codegen_);
-}
-
-static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
- X86_64Assembler* assembler) {
- Location op1_loc = locations->InAt(0);
- Location op2_loc = locations->InAt(1);
-
- // Shortcut for same input locations.
- if (op1_loc.Equals(op2_loc)) {
- // Can return immediately, as op1_loc == out_loc.
- // Note: if we ever support separate registers, e.g., output into memory, we need to check for
- // a copy here.
- DCHECK(locations->Out().Equals(op1_loc));
- return;
- }
-
- CpuRegister out = locations->Out().AsRegister<CpuRegister>();
- CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
-
- // (out := op1)
- // out <=? op2
- // if out is min jmp done
- // out := op2
- // done:
-
- if (is_long) {
- __ cmpq(out, op2);
- } else {
- __ cmpl(out, op2);
- }
-
- __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
-}
-
-static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
- LocationSummary* locations =
- new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
- locations->SetInAt(0, Location::RequiresRegister());
- locations->SetInAt(1, Location::RequiresRegister());
- locations->SetOut(Location::SameAsFirstInput());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
-}
-
-void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- CreateIntIntToIntLocations(allocator_, invoke);
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
- GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
-}
-
static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
LocationSummary* locations =
new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
@@ -728,12 +430,12 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
// direct x86 instruction, since NaN should map to 0 and large positive
// values need to be clipped to the extreme value.
codegen_->Load64BitValue(out, kPrimLongMax);
- __ cvtsi2sd(t2, out, /* is64bit */ true);
+ __ cvtsi2sd(t2, out, /* is64bit= */ true);
__ comisd(t1, t2);
__ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
__ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
__ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
- __ cvttsd2si(out, t1, /* is64bit */ true);
+ __ cvttsd2si(out, t1, /* is64bit= */ true);
__ Bind(&done);
}
@@ -1277,7 +979,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = dest->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
// Register `temp1` is not trashed by the read barrier emitted
// by GenerateFieldLoadWithBakerReadBarrier below, as that
// method produces a call to a ReadBarrierMarkRegX entry point,
@@ -1285,7 +987,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// temporaries such a `temp1`.
// /* HeapReference<Class> */ temp2 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
// If heap poisoning is enabled, `temp1` and `temp2` have been
// unpoisoned by the the previous calls to
// GenerateFieldLoadWithBakerReadBarrier.
@@ -1309,7 +1011,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
@@ -1332,7 +1034,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
// read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
// /* HeapReference<Class> */ TMP = temp2->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp2, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp2, component_offset, /* needs_null_check= */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
// If heap poisoning is enabled, `TMP` has been unpoisoned by
@@ -1356,7 +1058,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
// We do not need to emit a read barrier for the following
// heap reference load, as `temp1` is only used in a
// comparison with null below, and this reference is not
@@ -1384,10 +1086,10 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
// /* HeapReference<Class> */ temp1 = src->klass_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
+ invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
// /* HeapReference<Class> */ TMP = temp1->component_type_
codegen_->GenerateFieldLoadWithBakerReadBarrier(
- invoke, TMP_loc, temp1, component_offset, /* needs_null_check */ false);
+ invoke, TMP_loc, temp1, component_offset, /* needs_null_check= */ false);
__ testl(CpuRegister(TMP), CpuRegister(TMP));
__ j(kEqual, intrinsic_slow_path->GetEntryLabel());
} else {
@@ -1441,7 +1143,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
__ j(kEqual, &done);
// Given the numeric representation, it's enough to check the low bit of the rb_state.
- static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+ static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
@@ -1496,7 +1198,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
}
// We only need one card marking on the destination array.
- codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false);
+ codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null= */ false);
__ Bind(intrinsic_slow_path->GetExitLabel());
}
@@ -1528,13 +1230,6 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
}
void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
- if (kEmitCompilerReadBarrier &&
- !StringEqualsOptimizations(invoke).GetArgumentIsString() &&
- !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) {
- // No support for this odd case (String class is moveable, not in the boot image).
- return;
- }
-
LocationSummary* locations =
new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
locations->SetInAt(0, Location::RequiresRegister());
@@ -1580,7 +1275,15 @@ void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
// All string objects must have the same type since String cannot be subclassed.
// Receiver must be a string object, so its class field is equal to all strings' class fields.
// If the argument is a string object, its class field must be equal to receiver's class field.
+ //
+ // As the String class is expected to be non-movable, we can read the class
+ // field from String.equals' arguments without read barriers.
+ AssertNonMovableStringClass();
+ // Also, because we use the loaded class references only to compare them, we
+ // don't need to unpoison them.
+ // /* HeapReference<Class> */ rcx = str->klass_
__ movl(rcx, Address(str, class_offset));
+ // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
__ cmpl(rcx, Address(arg, class_offset));
__ j(kNotEqual, &return_false);
}
@@ -1749,7 +1452,7 @@ static void GenerateStringIndexOf(HInvoke* invoke,
// Ensure we have a start index >= 0;
__ xorl(counter, counter);
__ cmpl(start_index, Immediate(0));
- __ cmov(kGreater, counter, start_index, /* is64bit */ false); // 32-bit copy is enough.
+ __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
if (mirror::kUseStringCompression) {
NearLabel modify_counter, offset_uncompressed_label;
@@ -1811,19 +1514,19 @@ static void GenerateStringIndexOf(HInvoke* invoke,
}
void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
- CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true);
+ CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
}
void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
}
void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
- CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false);
+ CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
}
void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
- GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false);
+ GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
@@ -2137,7 +1840,7 @@ void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke)
void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
- /* no_rip */ true));
+ /* no_rip= */ true));
}
static void GenUnsafeGet(HInvoke* invoke,
@@ -2163,7 +1866,7 @@ static void GenUnsafeGet(HInvoke* invoke,
if (kUseBakerReadBarrier) {
Address src(base, offset, ScaleFactor::TIMES_1, 0);
codegen->GenerateReferenceLoadWithBakerReadBarrier(
- invoke, output_loc, base, src, /* needs_null_check */ false);
+ invoke, output_loc, base, src, /* needs_null_check= */ false);
} else {
__ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
codegen->GenerateReadBarrierSlow(
@@ -2227,22 +1930,22 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invo
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
- GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
@@ -2325,34 +2028,34 @@ static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool i
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
GenUnsafePut(
- invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_);
+ invoke->GetLocations(), DataType::Type::kReference, /* is_volatile= */ true, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ false, codegen_);
}
void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
- GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_);
+ GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile= */ true, codegen_);
}
static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
@@ -2437,8 +2140,8 @@ static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* co
out_loc, // Unused, used only as a "temporary" within the read barrier.
base,
field_addr,
- /* needs_null_check */ false,
- /* always_update_field */ true,
+ /* needs_null_check= */ false,
+ /* always_update_field= */ true,
&temp1,
&temp2);
}
@@ -2666,7 +2369,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
@@ -2674,7 +2377,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
- GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
@@ -2718,93 +2421,98 @@ static void GenOneBit(X86_64Assembler* assembler,
}
// Handle the non-constant cases.
- CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
- if (is_high) {
- // Use architectural support: basically 1 << bsr.
- if (src.IsRegister()) {
+ if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
+ src.IsRegister()) {
+ __ blsi(out, src.AsRegister<CpuRegister>());
+ } else {
+ CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
+ if (is_high) {
+ // Use architectural support: basically 1 << bsr.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ bsrq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ bsrl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // BSR sets ZF if the input was zero.
+ NearLabel is_zero, done;
+ __ j(kEqual, &is_zero);
+ __ movl(out, Immediate(1)); // Clears upper bits too.
if (is_long) {
- __ bsrq(tmp, src.AsRegister<CpuRegister>());
+ __ shlq(out, tmp);
} else {
- __ bsrl(tmp, src.AsRegister<CpuRegister>());
+ __ shll(out, tmp);
}
- } else if (is_long) {
- DCHECK(src.IsDoubleStackSlot());
- __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- } else {
- DCHECK(src.IsStackSlot());
- __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- }
- // BSR sets ZF if the input was zero.
- NearLabel is_zero, done;
- __ j(kEqual, &is_zero);
- __ movl(out, Immediate(1)); // Clears upper bits too.
- if (is_long) {
- __ shlq(out, tmp);
- } else {
- __ shll(out, tmp);
- }
- __ jmp(&done);
- __ Bind(&is_zero);
- __ xorl(out, out); // Clears upper bits too.
- __ Bind(&done);
- } else {
- // Copy input into temporary.
- if (src.IsRegister()) {
+ __ jmp(&done);
+ __ Bind(&is_zero);
+ __ xorl(out, out); // Clears upper bits too.
+ __ Bind(&done);
+ } else {
+ // Copy input into temporary.
+ if (src.IsRegister()) {
+ if (is_long) {
+ __ movq(tmp, src.AsRegister<CpuRegister>());
+ } else {
+ __ movl(tmp, src.AsRegister<CpuRegister>());
+ }
+ } else if (is_long) {
+ DCHECK(src.IsDoubleStackSlot());
+ __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ } else {
+ DCHECK(src.IsStackSlot());
+ __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
+ }
+ // Do the bit twiddling: basically tmp & -tmp;
if (is_long) {
- __ movq(tmp, src.AsRegister<CpuRegister>());
+ __ movq(out, tmp);
+ __ negq(tmp);
+ __ andq(out, tmp);
} else {
- __ movl(tmp, src.AsRegister<CpuRegister>());
+ __ movl(out, tmp);
+ __ negl(tmp);
+ __ andl(out, tmp);
}
- } else if (is_long) {
- DCHECK(src.IsDoubleStackSlot());
- __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- } else {
- DCHECK(src.IsStackSlot());
- __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
- }
- // Do the bit twiddling: basically tmp & -tmp;
- if (is_long) {
- __ movq(out, tmp);
- __ negq(tmp);
- __ andq(out, tmp);
- } else {
- __ movl(out, tmp);
- __ negl(tmp);
- __ andl(out, tmp);
}
}
}
void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
- CreateOneBitLocations(allocator_, invoke, /* is_high */ true);
+ CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
- GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ false);
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
- CreateOneBitLocations(allocator_, invoke, /* is_high */ true);
+ CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
}
void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
- GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ true, /* is_long */ true);
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
}
void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
- CreateOneBitLocations(allocator_, invoke, /* is_high */ false);
+ CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
- GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ false);
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
- CreateOneBitLocations(allocator_, invoke, /* is_high */ false);
+ CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
}
void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
- GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true);
+ GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
}
static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -2869,7 +2577,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke*
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
- GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -2877,7 +2585,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* inv
}
void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
- GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
@@ -2937,7 +2645,7 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke*
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
- GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
}
void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -2945,7 +2653,7 @@ void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* in
}
void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
- GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
+ GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
}
void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
@@ -2958,58 +2666,49 @@ void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
}
void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
- IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+ IntrinsicVisitor::IntegerValueOfInfo info =
+ IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
LocationSummary* locations = invoke->GetLocations();
X86_64Assembler* assembler = GetAssembler();
CpuRegister out = locations->Out().AsRegister<CpuRegister>();
InvokeRuntimeCallingConvention calling_convention;
- if (invoke->InputAt(0)->IsConstant()) {
+ CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+ if (invoke->InputAt(0)->IsIntConstant()) {
int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
- if (value >= info.low && value <= info.high) {
+ if (static_cast<uint32_t>(value - info.low) < info.length) {
// Just embed the j.l.Integer in the code.
- ScopedObjectAccess soa(Thread::Current());
- mirror::Object* boxed = info.cache->Get(value + (-info.low));
- DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
- __ movl(out, Immediate(static_cast<int32_t>(address)));
+ DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
+ codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
} else {
+ DCHECK(locations->CanCall());
// Allocate and initialize a new j.l.Integer.
// TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
// JIT object table.
- CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ movl(argument, Immediate(static_cast<int32_t>(address)));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ movl(Address(out, info.value_offset), Immediate(value));
}
} else {
+ DCHECK(locations->CanCall());
CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
// Check bounds of our cache.
__ leal(out, Address(in, -info.low));
- __ cmpl(out, Immediate(info.high - info.low + 1));
+ __ cmpl(out, Immediate(info.length));
NearLabel allocate, done;
__ j(kAboveEqual, &allocate);
// If the value is within the bounds, load the j.l.Integer directly from the array.
- uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
- uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
- if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
- __ movl(out, Address(out, TIMES_4, data_offset + address));
- } else {
- CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
- __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
- __ movl(out, Address(temp, out, TIMES_4, 0));
- }
+ DCHECK_NE(out.AsRegister(), argument.AsRegister());
+ codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
+ static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
+ "Check heap reference size.");
+ __ movl(out, Address(argument, out, TIMES_4, 0));
__ MaybeUnpoisonHeapReference(out);
__ jmp(&done);
__ Bind(&allocate);
// Otherwise allocate and initialize a new j.l.Integer.
- CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
- address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
- __ movl(argument, Immediate(static_cast<int32_t>(address)));
- codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
- CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+ codegen_->AllocateInstanceForIntrinsic(invoke->AsInvokeStaticOrDirect(),
+ info.integer_boot_image_offset);
__ movl(Address(out, info.value_offset), in);
__ Bind(&done);
}
@@ -3025,7 +2724,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
X86_64Assembler* assembler = GetAssembler();
CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
Address address = Address::Absolute
- (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip */ true);
+ (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
NearLabel done;
__ gs()->movl(out, address);
__ testl(out, out);
@@ -3046,6 +2745,9 @@ void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIB
UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
+UNIMPLEMENTED_INTRINSIC(X86_64, CRC32Update)
+UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateBytes)
+UNIMPLEMENTED_INTRINSIC(X86_64, CRC32UpdateByteBuffer)
UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h
index 5cb601edfe..199cfede1a 100644
--- a/compiler/optimizing/intrinsics_x86_64.h
+++ b/compiler/optimizing/intrinsics_x86_64.h
@@ -30,14 +30,14 @@ namespace x86_64 {
class CodeGeneratorX86_64;
class X86_64Assembler;
-class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
+class IntrinsicLocationsBuilderX86_64 final : public IntrinsicVisitor {
public:
explicit IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen);
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
@@ -55,14 +55,14 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor {
DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64);
};
-class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor {
+class IntrinsicCodeGeneratorX86_64 final : public IntrinsicVisitor {
public:
explicit IntrinsicCodeGeneratorX86_64(CodeGeneratorX86_64* codegen) : codegen_(codegen) {}
// Define visitor methods.
#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
- void Visit ## Name(HInvoke* invoke) OVERRIDE;
+ void Visit ## Name(HInvoke* invoke) override;
#include "intrinsics_list.h"
INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
#undef INTRINSICS_LIST
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index d3a0376e9c..0edb23b857 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -78,7 +78,8 @@ static void UpdateLoopPhisIn(HEnvironment* environment, HLoopInformation* info)
}
}
-void LICM::Run() {
+bool LICM::Run() {
+ bool didLICM = false;
DCHECK(side_effects_.HasRun());
// Only used during debug.
@@ -157,6 +158,7 @@ void LICM::Run() {
}
instruction->MoveBefore(pre_header->GetLastInstruction());
MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved);
+ didLICM = true;
}
if (!can_move && (instruction->CanThrow() || instruction->DoesAnyWrite())) {
@@ -167,6 +169,7 @@ void LICM::Run() {
}
}
}
+ return didLICM;
}
} // namespace art
diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h
index ee567aeb20..9cafddb05a 100644
--- a/compiler/optimizing/licm.h
+++ b/compiler/optimizing/licm.h
@@ -33,7 +33,7 @@ class LICM : public HOptimization {
: HOptimization(graph, name, stats),
side_effects_(side_effects) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kLoopInvariantCodeMotionPassName = "licm";
diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc
index 9fa5b74c62..50bfe843b5 100644
--- a/compiler/optimizing/linearize_test.cc
+++ b/compiler/optimizing/linearize_test.cc
@@ -16,11 +16,9 @@
#include <fstream>
-#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
-#include "code_generator_x86.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
#include "driver/compiler_options.h"
@@ -43,10 +41,8 @@ template <size_t number_of_blocks>
void LinearizeTest::TestCode(const std::vector<uint16_t>& data,
const uint32_t (&expected_order)[number_of_blocks]) {
HGraph* graph = CreateCFG(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
ASSERT_EQ(graph->GetLinearOrder().size(), number_of_blocks);
diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc
index 66660662e4..60f513ca48 100644
--- a/compiler/optimizing/live_ranges_test.cc
+++ b/compiler/optimizing/live_ranges_test.cc
@@ -14,11 +14,9 @@
* limitations under the License.
*/
-#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
-#include "code_generator_x86.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
#include "driver/compiler_options.h"
@@ -40,7 +38,7 @@ HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) {
// on how instructions are ordered.
RemoveSuspendChecks(graph);
// `Inline` conditions into ifs.
- PrepareForRegisterAllocation(graph).Run();
+ PrepareForRegisterAllocation(graph, *compiler_options_).Run();
return graph;
}
@@ -63,10 +61,8 @@ TEST_F(LiveRangesTest, CFG1) {
HGraph* graph = BuildGraph(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
@@ -109,10 +105,8 @@ TEST_F(LiveRangesTest, CFG2) {
Instruction::RETURN | 0 << 8);
HGraph* graph = BuildGraph(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval();
@@ -158,10 +152,8 @@ TEST_F(LiveRangesTest, CFG3) {
Instruction::RETURN | 0 << 8);
HGraph* graph = BuildGraph(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
// Test for the 4 constant.
@@ -235,10 +227,8 @@ TEST_F(LiveRangesTest, Loop1) {
HGraph* graph = BuildGraph(data);
RemoveSuspendChecks(graph);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
// Test for the 0 constant.
@@ -312,10 +302,8 @@ TEST_F(LiveRangesTest, Loop2) {
Instruction::RETURN | 0 << 8);
HGraph* graph = BuildGraph(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
// Test for the 0 constant.
@@ -388,10 +376,8 @@ TEST_F(LiveRangesTest, CFG4) {
Instruction::RETURN);
HGraph* graph = BuildGraph(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
// Test for the 0 constant.
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index 6621a03568..f11f7a9779 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -14,11 +14,9 @@
* limitations under the License.
*/
-#include "arch/x86/instruction_set_features_x86.h"
#include "base/arena_allocator.h"
#include "builder.h"
#include "code_generator.h"
-#include "code_generator_x86.h"
#include "dex/dex_file.h"
#include "dex/dex_instruction.h"
#include "driver/compiler_options.h"
@@ -49,11 +47,9 @@ static void DumpBitVector(BitVector* vector,
void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) {
HGraph* graph = CreateCFG(data);
// `Inline` conditions into ifs.
- PrepareForRegisterAllocation(graph).Run();
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
- SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
+ PrepareForRegisterAllocation(graph, *compiler_options_).Run();
+ std::unique_ptr<CodeGenerator> codegen = CodeGenerator::Create(graph, *compiler_options_);
+ SsaLivenessAnalysis liveness(graph, codegen.get(), GetScopedAllocator());
liveness.Analyze();
std::ostringstream buffer;
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
index 8b1812a6de..7d7bb94933 100644
--- a/compiler/optimizing/load_store_analysis.cc
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -152,7 +152,7 @@ bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1,
return true;
}
-void LoadStoreAnalysis::Run() {
+bool LoadStoreAnalysis::Run() {
for (HBasicBlock* block : graph_->GetReversePostOrder()) {
heap_location_collector_.VisitBasicBlock(block);
}
@@ -160,22 +160,23 @@ void LoadStoreAnalysis::Run() {
if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
// Bail out if there are too many heap locations to deal with.
heap_location_collector_.CleanUp();
- return;
+ return false;
}
if (!heap_location_collector_.HasHeapStores()) {
// Without heap stores, this pass would act mostly as GVN on heap accesses.
heap_location_collector_.CleanUp();
- return;
+ return false;
}
if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
// Don't do load/store elimination if the method has volatile field accesses or
// monitor operations, for now.
// TODO: do it right.
heap_location_collector_.CleanUp();
- return;
+ return false;
}
heap_location_collector_.BuildAliasingMatrix();
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
index 437e6be418..08d9309a3e 100644
--- a/compiler/optimizing/load_store_analysis.h
+++ b/compiler/optimizing/load_store_analysis.h
@@ -94,11 +94,13 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
HeapLocation(ReferenceInfo* ref_info,
+ DataType::Type type,
size_t offset,
HInstruction* index,
size_t vector_length,
int16_t declaring_class_def_index)
: ref_info_(ref_info),
+ type_(DataType::ToSigned(type)),
offset_(offset),
index_(index),
vector_length_(vector_length),
@@ -116,6 +118,7 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
}
ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
+ DataType::Type GetType() const { return type_; }
size_t GetOffset() const { return offset_; }
HInstruction* GetIndex() const { return index_; }
size_t GetVectorLength() const { return vector_length_; }
@@ -149,6 +152,10 @@ class HeapLocation : public ArenaObject<kArenaAllocLSA> {
private:
// Reference for instance/static field, array element or vector data.
ReferenceInfo* const ref_info_;
+ // Type of data residing at HeapLocation (always signed for integral
+ // data since e.g. a[i] and a[i] & 0xff are represented by differently
+ // signed types; char vs short are disambiguated through the reference).
+ const DataType::Type type_;
// Offset of static/instance field.
// Invalid when this HeapLocation is not field.
const size_t offset_;
@@ -237,19 +244,31 @@ class HeapLocationCollector : public HGraphVisitor {
DCHECK(object != nullptr);
DCHECK(field != nullptr);
return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(object)),
+ field->GetFieldType(),
field->GetFieldOffset().SizeValue(),
nullptr,
HeapLocation::kScalar,
field->GetDeclaringClassDefIndex());
}
- size_t GetArrayHeapLocation(HInstruction* array,
- HInstruction* index,
- size_t vector_length = HeapLocation::kScalar) const {
- DCHECK(array != nullptr);
- DCHECK(index != nullptr);
- DCHECK_GE(vector_length, HeapLocation::kScalar);
+ size_t GetArrayHeapLocation(HInstruction* instruction) const {
+ DCHECK(instruction != nullptr);
+ HInstruction* array = instruction->InputAt(0);
+ HInstruction* index = instruction->InputAt(1);
+ DataType::Type type = instruction->GetType();
+ size_t vector_length = HeapLocation::kScalar;
+ if (instruction->IsArraySet()) {
+ type = instruction->AsArraySet()->GetComponentType();
+ } else if (instruction->IsVecStore() ||
+ instruction->IsVecLoad()) {
+ HVecOperation* vec_op = instruction->AsVecOperation();
+ type = vec_op->GetPackedType();
+ vector_length = vec_op->GetVectorLength();
+ } else {
+ DCHECK(instruction->IsArrayGet());
+ }
return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(array)),
+ type,
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
@@ -279,13 +298,16 @@ class HeapLocationCollector : public HGraphVisitor {
// In later analysis, ComputeMayAlias() and MayAlias() compute and tell whether
// these indexes alias.
size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
+ DataType::Type type,
size_t offset,
HInstruction* index,
size_t vector_length,
int16_t declaring_class_def_index) const {
+ DataType::Type lookup_type = DataType::ToSigned(type);
for (size_t i = 0; i < heap_locations_.size(); i++) {
HeapLocation* loc = heap_locations_[i];
if (loc->GetReferenceInfo() == ref_info &&
+ loc->GetType() == lookup_type &&
loc->GetOffset() == offset &&
loc->GetIndex() == index &&
loc->GetVectorLength() == vector_length &&
@@ -425,6 +447,7 @@ class HeapLocationCollector : public HGraphVisitor {
}
HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
+ DataType::Type type,
size_t offset,
HInstruction* index,
size_t vector_length,
@@ -432,10 +455,10 @@ class HeapLocationCollector : public HGraphVisitor {
HInstruction* original_ref = HuntForOriginalReference(ref);
ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
size_t heap_location_idx = FindHeapLocationIndex(
- ref_info, offset, index, vector_length, declaring_class_def_index);
+ ref_info, type, offset, index, vector_length, declaring_class_def_index);
if (heap_location_idx == kHeapLocationNotFound) {
HeapLocation* heap_loc = new (GetGraph()->GetAllocator())
- HeapLocation(ref_info, offset, index, vector_length, declaring_class_def_index);
+ HeapLocation(ref_info, type, offset, index, vector_length, declaring_class_def_index);
heap_locations_.push_back(heap_loc);
return heap_loc;
}
@@ -446,29 +469,35 @@ class HeapLocationCollector : public HGraphVisitor {
if (field_info.IsVolatile()) {
has_volatile_ = true;
}
+ DataType::Type type = field_info.GetFieldType();
const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
const size_t offset = field_info.GetFieldOffset().SizeValue();
return GetOrCreateHeapLocation(ref,
+ type,
offset,
nullptr,
HeapLocation::kScalar,
declaring_class_def_index);
}
- void VisitArrayAccess(HInstruction* array, HInstruction* index, size_t vector_length) {
+ void VisitArrayAccess(HInstruction* array,
+ HInstruction* index,
+ DataType::Type type,
+ size_t vector_length) {
GetOrCreateHeapLocation(array,
+ type,
HeapLocation::kInvalidFieldOffset,
index,
vector_length,
HeapLocation::kDeclaringClassDefIndexForArrays);
}
- void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+ void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
if (location->GetReferenceInfo()->IsSingleton()) {
@@ -494,12 +523,12 @@ class HeapLocationCollector : public HGraphVisitor {
}
}
- void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+ void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
has_heap_stores_ = true;
}
@@ -507,35 +536,39 @@ class HeapLocationCollector : public HGraphVisitor {
// We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
// since we cannot accurately track the fields.
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+ void VisitArrayGet(HArrayGet* instruction) override {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
- VisitArrayAccess(array, index, HeapLocation::kScalar);
+ DataType::Type type = instruction->GetType();
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar);
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitArraySet(HArraySet* instruction) OVERRIDE {
+ void VisitArraySet(HArraySet* instruction) override {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
- VisitArrayAccess(array, index, HeapLocation::kScalar);
+ DataType::Type type = instruction->GetComponentType();
+ VisitArrayAccess(array, index, type, HeapLocation::kScalar);
has_heap_stores_ = true;
}
- void VisitVecLoad(HVecLoad* instruction) OVERRIDE {
+ void VisitVecLoad(HVecLoad* instruction) override {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
- VisitArrayAccess(array, index, instruction->GetVectorLength());
+ DataType::Type type = instruction->GetPackedType();
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength());
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitVecStore(HVecStore* instruction) OVERRIDE {
+ void VisitVecStore(HVecStore* instruction) override {
HInstruction* array = instruction->InputAt(0);
HInstruction* index = instruction->InputAt(1);
- VisitArrayAccess(array, index, instruction->GetVectorLength());
+ DataType::Type type = instruction->GetPackedType();
+ VisitArrayAccess(array, index, type, instruction->GetVectorLength());
has_heap_stores_ = true;
}
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
// Any new-instance or new-array cannot alias with references that
// pre-exist the new-instance/new-array. We append entries into
// ref_info_array_ which keeps track of the order of creation
@@ -547,7 +580,7 @@ class HeapLocationCollector : public HGraphVisitor {
CreateReferenceInfoForReferenceType(instruction);
}
- void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
+ void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) override {
has_monitor_operations_ = true;
}
@@ -572,7 +605,7 @@ class LoadStoreAnalysis : public HOptimization {
return heap_location_collector_;
}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis";
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
index 56361a8c90..bfe7a4f72f 100644
--- a/compiler/optimizing/load_store_analysis_test.cc
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -78,12 +78,16 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
// Test queries on HeapLocationCollector's ref info and index records.
ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array);
+ DataType::Type type = DataType::Type::kInt32;
size_t field = HeapLocation::kInvalidFieldOffset;
size_t vec = HeapLocation::kScalar;
size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
- size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field, c1, vec, class_def);
- size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field, c2, vec, class_def);
- size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field, index, vec, class_def);
+ size_t loc1 = heap_location_collector.FindHeapLocationIndex(
+ ref, type, field, c1, vec, class_def);
+ size_t loc2 = heap_location_collector.FindHeapLocationIndex(
+ ref, type, field, c2, vec, class_def);
+ size_t loc3 = heap_location_collector.FindHeapLocationIndex(
+ ref, type, field, index, vec, class_def);
// must find this reference info for array in HeapLocationCollector.
ASSERT_TRUE(ref != nullptr);
// must find these heap locations;
@@ -246,28 +250,28 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) {
size_t loc2 = HeapLocationCollector::kHeapLocationNotFound;
// Test alias: array[0] and array[1]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set1);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set2);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+0] and array[i-0]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub0);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set3);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set5);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+1] and array[i-1]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add1);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set6);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+1] and array[1-i]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add1);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, rev_sub1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set7);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+1] and array[i-(-1)]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add1);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_neg1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set4);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set8);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
}
@@ -409,70 +413,75 @@ TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) {
size_t loc1, loc2;
// Test alias: array[0] and array[0,1,2,3]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
+ // Test alias: array[0] and array[1,2,3,4]
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1);
+ ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+
// Test alias: array[0] and array[8,9,10,11]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[1] and array[8,9,10,11]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c1);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[1] and array[0,1,2,3]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c1);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[0,1,2,3] and array[8,9,10,11]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c0, 4);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_8);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[0,1,2,3] and array[1,2,3,4]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c1, 4);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(vstore_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_1);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[0] and array[i,i+1,i+2,i+3]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, c0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i] and array[0,1,2,3]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, index);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_0);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i] and array[i,i+1,i+2,i+3]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, index);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i] and array[i+8,i+9,i+10,i+11]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, index);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+6,i+7,i+8,i+9] and array[i+8,i+9,i+10,i+11]
// Test partial overlap.
- loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 4);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+6,i+7] and array[i,i+1,i+2,i+3]
// Test different vector lengths.
- loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+6,i+7] and array[i+8,i+9,i+10,i+11]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4);
+ loc1 = heap_location_collector.GetArrayHeapLocation(vstore_i_add6_vlen2);
+ loc2 = heap_location_collector.GetArrayHeapLocation(vstore_i_add8);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
}
@@ -563,33 +572,33 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) {
size_t loc2 = HeapLocationCollector::kHeapLocationNotFound;
// Test alias: array[i+0x80000000] and array[i-0x80000000]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x80000000);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_1);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+0x10] and array[i-0xFFFFFFF0]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x10);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0xFFFFFFF0);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_3);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_4);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+0x7FFFFFFF] and array[i-0x80000001]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x7FFFFFFF);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_5);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Test alias: array[i+0] and array[i-0]
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_8);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
// Should not alias:
- loc1 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_2);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_6);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
// Should not alias:
- loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0);
- loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_7);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_2);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
}
@@ -647,10 +656,10 @@ TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) {
// times the original reference has been transformed by BoundType,
// NullCheck, IntermediateAddress, etc.
ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 1U);
- size_t loc1 = heap_location_collector.GetArrayHeapLocation(array, c1);
- size_t loc2 = heap_location_collector.GetArrayHeapLocation(bound_type, c1);
- size_t loc3 = heap_location_collector.GetArrayHeapLocation(null_check, c1);
- size_t loc4 = heap_location_collector.GetArrayHeapLocation(inter_addr, c1);
+ size_t loc1 = heap_location_collector.GetArrayHeapLocation(array_get1);
+ size_t loc2 = heap_location_collector.GetArrayHeapLocation(array_get2);
+ size_t loc3 = heap_location_collector.GetArrayHeapLocation(array_get3);
+ size_t loc4 = heap_location_collector.GetArrayHeapLocation(array_get4);
ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
ASSERT_EQ(loc1, loc2);
ASSERT_EQ(loc1, loc3);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 237ecd3c10..b33d0f488e 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -107,7 +107,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) {
}
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
// Populate the heap_values array for this block.
// TODO: try to reuse the heap_values array from one predecessor if possible.
if (block->IsLoopHeader()) {
@@ -160,7 +160,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
// Scan the list of removed loads to see if we can reuse `type_conversion`, if
// the other removed load has the same substitute and type and is dominated
- // by `type_conversioni`.
+ // by `type_conversion`.
void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) {
size_t size = removed_loads_.size();
HInstruction* load = removed_loads_[index];
@@ -458,8 +458,13 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
if (from_all_predecessors) {
if (ref_info->IsSingletonAndRemovable() &&
- block->IsSingleReturnOrReturnVoidAllowingPhis()) {
- // Values in the singleton are not needed anymore.
+ (block->IsSingleReturnOrReturnVoidAllowingPhis() ||
+ (block->EndsWithReturn() && (merged_value != kUnknownHeapValue ||
+ merged_store_value != kUnknownHeapValue)))) {
+ // Values in the singleton are not needed anymore:
+ // (1) if this block consists of a sole return, or
+ // (2) if this block returns and a usable merged value is obtained
+ // (loads prior to the return will always use that value).
} else if (!IsStore(merged_value)) {
// We don't track merged value as a store anymore. We have to
// hold the stores in predecessors live here.
@@ -542,16 +547,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
- void VisitGetLocation(HInstruction* instruction,
- HInstruction* ref,
- size_t offset,
- HInstruction* index,
- size_t vector_length,
- int16_t declaring_class_def_index) {
- HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
- ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
- size_t idx = heap_location_collector_.FindHeapLocationIndex(
- ref_info, offset, index, vector_length, declaring_class_def_index);
+ void VisitGetLocation(HInstruction* instruction, size_t idx) {
DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
@@ -569,23 +565,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
heap_values[idx] = instruction;
KeepStoresIfAliasedToLocation(heap_values, idx);
} else {
- if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) {
- // The only situation where the same heap location has different type is when
- // we do an array get on an instruction that originates from the null constant
- // (the null could be behind a field access, an array access, a null check or
- // a bound type).
- // In order to stay properly typed on primitive types, we do not eliminate
- // the array gets.
- if (kIsDebugBuild) {
- DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName();
- DCHECK(instruction->IsArrayGet()) << instruction->DebugName();
- }
- // Load isn't eliminated. Put the load as the value into the HeapLocation.
- // This acts like GVN but with better aliasing analysis.
- heap_values[idx] = instruction;
- KeepStoresIfAliasedToLocation(heap_values, idx);
- return;
- }
+ // Load is eliminated.
AddRemovedLoad(instruction, heap_value);
TryRemovingNullCheck(instruction);
}
@@ -610,21 +590,11 @@ class LSEVisitor : public HGraphDelegateVisitor {
return false;
}
- void VisitSetLocation(HInstruction* instruction,
- HInstruction* ref,
- size_t offset,
- HInstruction* index,
- size_t vector_length,
- int16_t declaring_class_def_index,
- HInstruction* value) {
+ void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) {
+ DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
DCHECK(!IsStore(value)) << value->DebugName();
// value may already have a substitute.
value = FindSubstitute(value);
- HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
- ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
- size_t idx = heap_location_collector_.FindHeapLocationIndex(
- ref_info, offset, index, vector_length, declaring_class_def_index);
- DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound);
ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
HInstruction* heap_value = heap_values[idx];
@@ -644,7 +614,8 @@ class LSEVisitor : public HGraphDelegateVisitor {
} else if (!loop_info->IsIrreducible()) {
// instruction is a store in the loop so the loop must do write.
DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
- if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) {
+ ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo();
+ if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(ref_info->GetReference())) {
// original_ref is created inside the loop. Value stored to it isn't needed at
// the loop header. This is true for outer loops also.
possibly_redundant = true;
@@ -685,83 +656,43 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
- void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
- HInstruction* obj = instruction->InputAt(0);
- size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
- int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
- VisitGetLocation(instruction,
- obj,
- offset,
- nullptr,
- HeapLocation::kScalar,
- declaring_class_def_index);
+ void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override {
+ HInstruction* object = instruction->InputAt(0);
+ const FieldInfo& field = instruction->GetFieldInfo();
+ VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field));
}
- void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
- HInstruction* obj = instruction->InputAt(0);
- size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
- int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
+ void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override {
+ HInstruction* object = instruction->InputAt(0);
+ const FieldInfo& field = instruction->GetFieldInfo();
HInstruction* value = instruction->InputAt(1);
- VisitSetLocation(instruction,
- obj,
- offset,
- nullptr,
- HeapLocation::kScalar,
- declaring_class_def_index,
- value);
+ size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field);
+ VisitSetLocation(instruction, idx, value);
}
- void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+ void VisitStaticFieldGet(HStaticFieldGet* instruction) override {
HInstruction* cls = instruction->InputAt(0);
- size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
- int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
- VisitGetLocation(instruction,
- cls,
- offset,
- nullptr,
- HeapLocation::kScalar,
- declaring_class_def_index);
+ const FieldInfo& field = instruction->GetFieldInfo();
+ VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field));
}
- void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+ void VisitStaticFieldSet(HStaticFieldSet* instruction) override {
HInstruction* cls = instruction->InputAt(0);
- size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue();
- int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex();
- HInstruction* value = instruction->InputAt(1);
- VisitSetLocation(instruction,
- cls,
- offset,
- nullptr,
- HeapLocation::kScalar,
- declaring_class_def_index,
- value);
- }
-
- void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
- HInstruction* array = instruction->InputAt(0);
- HInstruction* index = instruction->InputAt(1);
- VisitGetLocation(instruction,
- array,
- HeapLocation::kInvalidFieldOffset,
- index,
- HeapLocation::kScalar,
- HeapLocation::kDeclaringClassDefIndexForArrays);
- }
-
- void VisitArraySet(HArraySet* instruction) OVERRIDE {
- HInstruction* array = instruction->InputAt(0);
- HInstruction* index = instruction->InputAt(1);
- HInstruction* value = instruction->InputAt(2);
- VisitSetLocation(instruction,
- array,
- HeapLocation::kInvalidFieldOffset,
- index,
- HeapLocation::kScalar,
- HeapLocation::kDeclaringClassDefIndexForArrays,
- value);
- }
-
- void VisitDeoptimize(HDeoptimize* instruction) {
+ const FieldInfo& field = instruction->GetFieldInfo();
+ size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field);
+ VisitSetLocation(instruction, idx, instruction->InputAt(1));
+ }
+
+ void VisitArrayGet(HArrayGet* instruction) override {
+ VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction));
+ }
+
+ void VisitArraySet(HArraySet* instruction) override {
+ size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction);
+ VisitSetLocation(instruction, idx, instruction->InputAt(2));
+ }
+
+ void VisitDeoptimize(HDeoptimize* instruction) override {
const ScopedArenaVector<HInstruction*>& heap_values =
heap_values_for_[instruction->GetBlock()->GetBlockId()];
for (HInstruction* heap_value : heap_values) {
@@ -812,15 +743,15 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
- void VisitReturn(HReturn* instruction) OVERRIDE {
+ void VisitReturn(HReturn* instruction) override {
HandleExit(instruction->GetBlock());
}
- void VisitReturnVoid(HReturnVoid* return_void) OVERRIDE {
+ void VisitReturnVoid(HReturnVoid* return_void) override {
HandleExit(return_void->GetBlock());
}
- void VisitThrow(HThrow* throw_instruction) OVERRIDE {
+ void VisitThrow(HThrow* throw_instruction) override {
HandleExit(throw_instruction->GetBlock());
}
@@ -846,35 +777,35 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
- void VisitInvoke(HInvoke* invoke) OVERRIDE {
+ void VisitInvoke(HInvoke* invoke) override {
HandleInvoke(invoke);
}
- void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE {
+ void VisitClinitCheck(HClinitCheck* clinit) override {
HandleInvoke(clinit);
}
- void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE {
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE {
+ void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE {
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE {
+ void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override {
// Conservatively treat it as an invocation.
HandleInvoke(instruction);
}
- void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+ void VisitNewInstance(HNewInstance* new_instance) override {
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance);
if (ref_info == nullptr) {
// new_instance isn't used for field accesses. No need to process it.
@@ -898,7 +829,7 @@ class LSEVisitor : public HGraphDelegateVisitor {
}
}
- void VisitNewArray(HNewArray* new_array) OVERRIDE {
+ void VisitNewArray(HNewArray* new_array) override {
ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
if (ref_info == nullptr) {
// new_array isn't used for array accesses. No need to process it.
@@ -948,22 +879,22 @@ class LSEVisitor : public HGraphDelegateVisitor {
DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
};
-void LoadStoreElimination::Run() {
+bool LoadStoreElimination::Run() {
if (graph_->IsDebuggable() || graph_->HasTryCatch()) {
// Debugger may set heap values or trigger deoptimization of callers.
// Try/catch support not implemented yet.
// Skip this optimization.
- return;
+ return false;
}
const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector();
if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
// No HeapLocation information from LSA, skip this optimization.
- return;
+ return false;
}
// TODO: analyze VecLoad/VecStore better.
if (graph_->HasSIMD()) {
- return;
+ return false;
}
LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_);
@@ -971,6 +902,8 @@ void LoadStoreElimination::Run() {
lse_visitor.VisitBasicBlock(block);
}
lse_visitor.RemoveInstructions();
+
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 7153541baf..f7ba41a1af 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -35,7 +35,7 @@ class LoadStoreElimination : public HOptimization {
side_effects_(side_effects),
lsa_(lsa) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kLoadStoreEliminationPassName = "load_store_elimination";
diff --git a/compiler/optimizing/loop_analysis.cc b/compiler/optimizing/loop_analysis.cc
new file mode 100644
index 0000000000..2ae3683ffa
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.cc
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "loop_analysis.h"
+
+#include "base/bit_vector-inl.h"
+#include "induction_var_range.h"
+
+namespace art {
+
+void LoopAnalysis::CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count) {
+ analysis_results->trip_count_ = trip_count;
+
+ for (HBlocksInLoopIterator block_it(*loop_info);
+ !block_it.Done();
+ block_it.Advance()) {
+ HBasicBlock* block = block_it.Current();
+
+ // Check whether one of the successor is loop exit.
+ for (HBasicBlock* successor : block->GetSuccessors()) {
+ if (!loop_info->Contains(*successor)) {
+ analysis_results->exits_num_++;
+
+ // We track number of invariant loop exits which correspond to HIf instruction and
+ // can be eliminated by loop peeling; other control flow instruction are ignored and will
+ // not cause loop peeling to happen as they either cannot be inside a loop, or by
+ // definition cannot be loop exits (unconditional instructions), or are not beneficial for
+ // the optimization.
+ HIf* hif = block->GetLastInstruction()->AsIf();
+ if (hif != nullptr && !loop_info->Contains(*hif->InputAt(0)->GetBlock())) {
+ analysis_results->invariant_exits_num_++;
+ }
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instruction = it.Current();
+ if (it.Current()->GetType() == DataType::Type::kInt64) {
+ analysis_results->has_long_type_instructions_ = true;
+ }
+ if (MakesScalarPeelingUnrollingNonBeneficial(instruction)) {
+ analysis_results->has_instructions_preventing_scalar_peeling_ = true;
+ analysis_results->has_instructions_preventing_scalar_unrolling_ = true;
+ }
+ analysis_results->instr_num_++;
+ }
+ analysis_results->bb_num_++;
+ }
+}
+
+int64_t LoopAnalysis::GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range) {
+ int64_t trip_count;
+ if (!induction_range->HasKnownTripCount(loop_info, &trip_count)) {
+ trip_count = LoopAnalysisInfo::kUnknownTripCount;
+ }
+ return trip_count;
+}
+
+// Default implementation of loop helper; used for all targets unless a custom implementation
+// is provided. Enables scalar loop peeling and unrolling with the most conservative heuristics.
+class ArchDefaultLoopHelper : public ArchNoOptsLoopHelper {
+ public:
+ // Scalar loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kScalarMaxUnrollFactor = 2;
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kScalarHeuristicMaxBodySizeInstr = 17;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kScalarHeuristicMaxBodySizeBlocks = 6;
+ // Maximum number of instructions to be created as a result of full unrolling.
+ static constexpr uint32_t kScalarHeuristicFullyUnrolledMaxInstrThreshold = 35;
+
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* analysis_info) const override {
+ return analysis_info->HasLongTypeInstructions() ||
+ IsLoopTooBig(analysis_info,
+ kScalarHeuristicMaxBodySizeInstr,
+ kScalarHeuristicMaxBodySizeBlocks);
+ }
+
+ uint32_t GetScalarUnrollingFactor(const LoopAnalysisInfo* analysis_info) const override {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // Unroll only loops with known trip count.
+ if (trip_count == LoopAnalysisInfo::kUnknownTripCount) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+ uint32_t desired_unrolling_factor = kScalarMaxUnrollFactor;
+ if (trip_count < desired_unrolling_factor || trip_count % desired_unrolling_factor != 0) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+
+ return desired_unrolling_factor;
+ }
+
+ bool IsLoopPeelingEnabled() const override { return true; }
+
+ bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info) const override {
+ int64_t trip_count = analysis_info->GetTripCount();
+ // We assume that trip count is known.
+ DCHECK_NE(trip_count, LoopAnalysisInfo::kUnknownTripCount);
+ size_t instr_num = analysis_info->GetNumberOfInstructions();
+ return (trip_count * instr_num < kScalarHeuristicFullyUnrolledMaxInstrThreshold);
+ }
+
+ protected:
+ bool IsLoopTooBig(LoopAnalysisInfo* loop_analysis_info,
+ size_t instr_threshold,
+ size_t bb_threshold) const {
+ size_t instr_num = loop_analysis_info->GetNumberOfInstructions();
+ size_t bb_num = loop_analysis_info->GetNumberOfBasicBlocks();
+ return (instr_num >= instr_threshold || bb_num >= bb_threshold);
+ }
+};
+
+// Custom implementation of loop helper for arm64 target. Enables heuristics for scalar loop
+// peeling and unrolling and supports SIMD loop unrolling.
+class Arm64LoopHelper : public ArchDefaultLoopHelper {
+ public:
+ // SIMD loop unrolling parameters and heuristics.
+ //
+ // Maximum possible unrolling factor.
+ static constexpr uint32_t kArm64SimdMaxUnrollFactor = 8;
+ // Loop's maximum instruction count. Loops with higher count will not be unrolled.
+ static constexpr uint32_t kArm64SimdHeuristicMaxBodySizeInstr = 50;
+
+ // Loop's maximum instruction count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeInstr = 40;
+ // Loop's maximum basic block count. Loops with higher count will not be peeled/unrolled.
+ static constexpr uint32_t kArm64ScalarHeuristicMaxBodySizeBlocks = 8;
+
+ bool IsLoopNonBeneficialForScalarOpts(LoopAnalysisInfo* loop_analysis_info) const override {
+ return IsLoopTooBig(loop_analysis_info,
+ kArm64ScalarHeuristicMaxBodySizeInstr,
+ kArm64ScalarHeuristicMaxBodySizeBlocks);
+ }
+
+ uint32_t GetSIMDUnrollingFactor(HBasicBlock* block,
+ int64_t trip_count,
+ uint32_t max_peel,
+ uint32_t vector_length) const override {
+ // Don't unroll with insufficient iterations.
+ // TODO: Unroll loops with unknown trip count.
+ DCHECK_NE(vector_length, 0u);
+ if (trip_count < (2 * vector_length + max_peel)) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+ // Don't unroll for large loop body size.
+ uint32_t instruction_count = block->GetInstructions().CountSize();
+ if (instruction_count >= kArm64SimdHeuristicMaxBodySizeInstr) {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+ // Find a beneficial unroll factor with the following restrictions:
+ // - At least one iteration of the transformed loop should be executed.
+ // - The loop body shouldn't be "too big" (heuristic).
+
+ uint32_t uf1 = kArm64SimdHeuristicMaxBodySizeInstr / instruction_count;
+ uint32_t uf2 = (trip_count - max_peel) / vector_length;
+ uint32_t unroll_factor =
+ TruncToPowerOfTwo(std::min({uf1, uf2, kArm64SimdMaxUnrollFactor}));
+ DCHECK_GE(unroll_factor, 1u);
+ return unroll_factor;
+ }
+};
+
+ArchNoOptsLoopHelper* ArchNoOptsLoopHelper::Create(InstructionSet isa,
+ ArenaAllocator* allocator) {
+ switch (isa) {
+ case InstructionSet::kArm64: {
+ return new (allocator) Arm64LoopHelper;
+ }
+ default: {
+ return new (allocator) ArchDefaultLoopHelper;
+ }
+ }
+}
+
+} // namespace art
diff --git a/compiler/optimizing/loop_analysis.h b/compiler/optimizing/loop_analysis.h
new file mode 100644
index 0000000000..57509ee410
--- /dev/null
+++ b/compiler/optimizing/loop_analysis.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
+
+#include "nodes.h"
+
+namespace art {
+
+class InductionVarRange;
+class LoopAnalysis;
+
+// Class to hold cached information on properties of the loop.
+class LoopAnalysisInfo : public ValueObject {
+ public:
+ // No loop unrolling factor (just one copy of the loop-body).
+ static constexpr uint32_t kNoUnrollingFactor = 1;
+ // Used for unknown and non-constant trip counts (see InductionVarRange::HasKnownTripCount).
+ static constexpr int64_t kUnknownTripCount = -1;
+
+ explicit LoopAnalysisInfo(HLoopInformation* loop_info)
+ : trip_count_(kUnknownTripCount),
+ bb_num_(0),
+ instr_num_(0),
+ exits_num_(0),
+ invariant_exits_num_(0),
+ has_instructions_preventing_scalar_peeling_(false),
+ has_instructions_preventing_scalar_unrolling_(false),
+ has_long_type_instructions_(false),
+ loop_info_(loop_info) {}
+
+ int64_t GetTripCount() const { return trip_count_; }
+ size_t GetNumberOfBasicBlocks() const { return bb_num_; }
+ size_t GetNumberOfInstructions() const { return instr_num_; }
+ size_t GetNumberOfExits() const { return exits_num_; }
+ size_t GetNumberOfInvariantExits() const { return invariant_exits_num_; }
+
+ bool HasInstructionsPreventingScalarPeeling() const {
+ return has_instructions_preventing_scalar_peeling_;
+ }
+
+ bool HasInstructionsPreventingScalarUnrolling() const {
+ return has_instructions_preventing_scalar_unrolling_;
+ }
+
+ bool HasInstructionsPreventingScalarOpts() const {
+ return HasInstructionsPreventingScalarPeeling() || HasInstructionsPreventingScalarUnrolling();
+ }
+
+ bool HasLongTypeInstructions() const {
+ return has_long_type_instructions_;
+ }
+
+ HLoopInformation* GetLoopInfo() const { return loop_info_; }
+
+ private:
+ // Trip count of the loop if known, kUnknownTripCount otherwise.
+ int64_t trip_count_;
+ // Number of basic blocks in the loop body.
+ size_t bb_num_;
+ // Number of instructions in the loop body.
+ size_t instr_num_;
+ // Number of loop's exits.
+ size_t exits_num_;
+ // Number of "if" loop exits (with HIf instruction) whose condition is loop-invariant.
+ size_t invariant_exits_num_;
+ // Whether the loop has instructions which make scalar loop peeling non-beneficial.
+ bool has_instructions_preventing_scalar_peeling_;
+ // Whether the loop has instructions which make scalar loop unrolling non-beneficial.
+ bool has_instructions_preventing_scalar_unrolling_;
+ // Whether the loop has instructions of primitive long type; unrolling these loop will
+ // likely introduce spill/fills on 32-bit targets.
+ bool has_long_type_instructions_;
+
+ // Corresponding HLoopInformation.
+ HLoopInformation* loop_info_;
+
+ friend class LoopAnalysis;
+};
+
+// Placeholder class for methods and routines used to analyse loops, calculate loop properties
+// and characteristics.
+class LoopAnalysis : public ValueObject {
+ public:
+ // Calculates loops basic properties like body size, exits number, etc. and fills
+ // 'analysis_results' with this information.
+ static void CalculateLoopBasicProperties(HLoopInformation* loop_info,
+ LoopAnalysisInfo* analysis_results,
+ int64_t trip_count);
+
+ // Returns the trip count of the loop if it is known and kUnknownTripCount otherwise.
+ static int64_t GetLoopTripCount(HLoopInformation* loop_info,
+ const InductionVarRange* induction_range);
+
+ private:
+ // Returns whether an instruction makes scalar loop peeling/unrolling non-beneficial.
+ //
+ // If in the loop body we have a dex/runtime call then its contribution to the whole
+ // loop performance will probably prevail. So peeling/unrolling optimization will not bring
+ // any noticeable performance improvement. It will increase the code size.
+ static bool MakesScalarPeelingUnrollingNonBeneficial(HInstruction* instruction) {
+ return (instruction->IsNewArray() ||
+ instruction->IsNewInstance() ||
+ instruction->IsUnresolvedInstanceFieldGet() ||
+ instruction->IsUnresolvedInstanceFieldSet() ||
+ instruction->IsUnresolvedStaticFieldGet() ||
+ instruction->IsUnresolvedStaticFieldSet() ||
+ // TODO: Support loops with intrinsified invokes.
+ instruction->IsInvoke());
+ }
+};
+
+//
+// Helper class which holds target-dependent methods and constants needed for loop optimizations.
+//
+// To support peeling/unrolling for a new architecture one needs to create new helper class,
+// inherit it from this and add implementation for the following methods.
+//
+class ArchNoOptsLoopHelper : public ArenaObject<kArenaAllocOptimization> {
+ public:
+ virtual ~ArchNoOptsLoopHelper() {}
+
+ // Creates an instance of specialised helper for the target or default helper if the target
+ // doesn't support loop peeling and unrolling.
+ static ArchNoOptsLoopHelper* Create(InstructionSet isa, ArenaAllocator* allocator);
+
+ // Returns whether the loop is not beneficial for loop peeling/unrolling.
+ //
+ // For example, if the loop body has too many instructions then peeling/unrolling optimization
+ // will not bring any noticeable performance improvement however will increase the code size.
+ //
+ // Returns 'true' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopNonBeneficialForScalarOpts(
+ LoopAnalysisInfo* loop_analysis_info ATTRIBUTE_UNUSED) const { return true; }
+
+ // Returns optimal scalar unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetScalarUnrollingFactor(
+ const LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+
+ // Returns whether scalar loop peeling is enabled,
+ //
+ // Returns 'false' by default, should be overridden by particular target loop helper.
+ virtual bool IsLoopPeelingEnabled() const { return false; }
+
+ // Returns whether it is beneficial to fully unroll the loop.
+ //
+ // Returns 'false' by default, should be overridden by particular target loop helper.
+ virtual bool IsFullUnrollingBeneficial(LoopAnalysisInfo* analysis_info ATTRIBUTE_UNUSED) const {
+ return false;
+ }
+
+ // Returns optimal SIMD unrolling factor for the loop.
+ //
+ // Returns kNoUnrollingFactor by default, should be overridden by particular target loop helper.
+ virtual uint32_t GetSIMDUnrollingFactor(HBasicBlock* block ATTRIBUTE_UNUSED,
+ int64_t trip_count ATTRIBUTE_UNUSED,
+ uint32_t max_peel ATTRIBUTE_UNUSED,
+ uint32_t vector_length ATTRIBUTE_UNUSED) const {
+ return LoopAnalysisInfo::kNoUnrollingFactor;
+ }
+};
+
+} // namespace art
+
+#endif // ART_COMPILER_OPTIMIZING_LOOP_ANALYSIS_H_
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 9f278a9f4e..6c76ab858b 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -23,7 +23,7 @@
#include "arch/mips64/instruction_set_features_mips64.h"
#include "arch/x86/instruction_set_features_x86.h"
#include "arch/x86_64/instruction_set_features_x86_64.h"
-#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
#include "linear_order.h"
#include "mirror/array-inl.h"
#include "mirror/string.h"
@@ -33,9 +33,6 @@ namespace art {
// Enables vectorization (SIMDization) in the loop optimizer.
static constexpr bool kEnableVectorization = true;
-// No loop unrolling factor (just one copy of the loop-body).
-static constexpr uint32_t kNoUnrollingFactor = 1;
-
//
// Static helpers.
//
@@ -227,6 +224,7 @@ static bool IsNarrowerOperands(HInstruction* a,
/*out*/ HInstruction** r,
/*out*/ HInstruction** s,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr && b != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) {
@@ -247,6 +245,7 @@ static bool IsNarrowerOperand(HInstruction* a,
DataType::Type type,
/*out*/ HInstruction** r,
/*out*/ bool* is_unsigned) {
+ DCHECK(a != nullptr);
// Look for a matching sign extension.
DataType::Type stype = HVecOperation::ToSignedType(type);
if (IsSignExtensionAndGet(a, stype, r)) {
@@ -270,20 +269,28 @@ static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type
return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type));
}
-// Detect up to two instructions a and b, and an acccumulated constant c.
-static bool IsAddConstHelper(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c,
- int32_t depth) {
- static constexpr int32_t kMaxDepth = 8; // don't search too deep
+// Detect up to two added operands a and b and an acccumulated constant c.
+static bool IsAddConst(HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c,
+ int32_t depth = 8) { // don't search too deep
int64_t value = 0;
+ // Enter add/sub while still within reasonable depth.
+ if (depth > 0) {
+ if (instruction->IsAdd()) {
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1) &&
+ IsAddConst(instruction->InputAt(1), a, b, c, depth - 1);
+ } else if (instruction->IsSub() &&
+ IsInt64AndGet(instruction->InputAt(1), &value)) {
+ *c -= value;
+ return IsAddConst(instruction->InputAt(0), a, b, c, depth - 1);
+ }
+ }
+ // Otherwise, deal with leaf nodes.
if (IsInt64AndGet(instruction, &value)) {
*c += value;
return true;
- } else if (instruction->IsAdd() && depth <= kMaxDepth) {
- return IsAddConstHelper(instruction->InputAt(0), a, b, c, depth + 1) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, depth + 1);
} else if (*a == nullptr) {
*a = instruction;
return true;
@@ -291,42 +298,40 @@ static bool IsAddConstHelper(HInstruction* instruction,
*b = instruction;
return true;
}
- return false; // too many non-const operands
+ return false; // too many operands
}
-// Detect a + b + c for an optional constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ HInstruction** b,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- // Try to find a + b and accumulated c.
- if (IsAddConstHelper(instruction->InputAt(0), a, b, c, /*depth*/ 0) &&
- IsAddConstHelper(instruction->InputAt(1), a, b, c, /*depth*/ 0) &&
- *b != nullptr) {
- return true;
+// Detect a + b + c with optional constant c.
+static bool IsAddConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b,
+ /*out*/ int64_t* c) {
+ if (IsAddConst(instruction, a, b, c) && *a != nullptr) {
+ if (*b == nullptr) {
+ // Constant is usually already present, unless accumulated.
+ *b = graph->GetConstant(instruction->GetType(), (*c));
+ *c = 0;
}
- // Found a + b.
- *a = instruction->InputAt(0);
- *b = instruction->InputAt(1);
- *c = 0;
return true;
}
return false;
}
-// Detect a + c for constant c.
-static bool IsAddConst(HInstruction* instruction,
- /*out*/ HInstruction** a,
- /*out*/ int64_t* c) {
- if (instruction->IsAdd()) {
- if (IsInt64AndGet(instruction->InputAt(0), c)) {
- *a = instruction->InputAt(1);
- return true;
- } else if (IsInt64AndGet(instruction->InputAt(1), c)) {
- *a = instruction->InputAt(0);
- return true;
- }
+// Detect a direct a - b or a hidden a - (-c).
+static bool IsSubConst2(HGraph* graph,
+ HInstruction* instruction,
+ /*out*/ HInstruction** a,
+ /*out*/ HInstruction** b) {
+ int64_t c = 0;
+ if (instruction->IsSub()) {
+ *a = instruction->InputAt(0);
+ *b = instruction->InputAt(1);
+ return true;
+ } else if (IsAddConst(instruction, a, b, &c) && *a != nullptr && *b == nullptr) {
+ // Constant for the hidden subtraction.
+ *b = graph->GetConstant(instruction->GetType(), -c);
+ return true;
}
return false;
}
@@ -346,7 +351,10 @@ static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) {
// Translates vector operation to reduction kind.
static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) {
- if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) {
+ if (reduction->IsVecAdd() ||
+ reduction->IsVecSub() ||
+ reduction->IsVecSADAccumulate() ||
+ reduction->IsVecDotProd()) {
return HVecReduce::kSum;
}
LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId();
@@ -380,17 +388,82 @@ static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) {
return true;
}
+// Tries to statically evaluate condition of the specified "HIf" for other condition checks.
+static void TryToEvaluateIfCondition(HIf* instruction, HGraph* graph) {
+ HInstruction* cond = instruction->InputAt(0);
+
+ // If a condition 'cond' is evaluated in an HIf instruction then in the successors of the
+ // IF_BLOCK we statically know the value of the condition 'cond' (TRUE in TRUE_SUCC, FALSE in
+ // FALSE_SUCC). Using that we can replace another evaluation (use) EVAL of the same 'cond'
+ // with TRUE value (FALSE value) if every path from the ENTRY_BLOCK to EVAL_BLOCK contains the
+ // edge HIF_BLOCK->TRUE_SUCC (HIF_BLOCK->FALSE_SUCC).
+ // if (cond) { if(cond) {
+ // if (cond) {} if (1) {}
+ // } else { =======> } else {
+ // if (cond) {} if (0) {}
+ // } }
+ if (!cond->IsConstant()) {
+ HBasicBlock* true_succ = instruction->IfTrueSuccessor();
+ HBasicBlock* false_succ = instruction->IfFalseSuccessor();
+
+ DCHECK_EQ(true_succ->GetPredecessors().size(), 1u);
+ DCHECK_EQ(false_succ->GetPredecessors().size(), 1u);
+
+ const HUseList<HInstruction*>& uses = cond->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ HBasicBlock* user_block = user->GetBlock();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (true_succ->Dominates(user_block)) {
+ user->ReplaceInput(graph->GetIntConstant(1), index);
+ } else if (false_succ->Dominates(user_block)) {
+ user->ReplaceInput(graph->GetIntConstant(0), index);
+ }
+ }
+ }
+}
+
+// Peel the first 'count' iterations of the loop.
+static void PeelByCount(HLoopInformation* loop_info,
+ int count,
+ InductionVarRange* induction_range) {
+ for (int i = 0; i < count; i++) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info, induction_range);
+ helper.DoPeeling();
+ }
+}
+
+// Returns the narrower type out of instructions a and b types.
+static DataType::Type GetNarrowerType(HInstruction* a, HInstruction* b) {
+ DataType::Type type = a->GetType();
+ if (DataType::Size(b->GetType()) < DataType::Size(type)) {
+ type = b->GetType();
+ }
+ if (a->IsTypeConversion() &&
+ DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(type)) {
+ type = a->InputAt(0)->GetType();
+ }
+ if (b->IsTypeConversion() &&
+ DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(type)) {
+ type = b->InputAt(0)->GetType();
+ }
+ return type;
+}
+
//
// Public methods.
//
HLoopOptimization::HLoopOptimization(HGraph* graph,
- CompilerDriver* compiler_driver,
+ const CompilerOptions* compiler_options,
HInductionVarAnalysis* induction_analysis,
OptimizingCompilerStats* stats,
const char* name)
: HOptimization(graph, name, stats),
- compiler_driver_(compiler_driver),
+ compiler_options_(compiler_options),
induction_range_(induction_analysis),
loop_allocator_(nullptr),
global_allocator_(graph_->GetAllocator()),
@@ -411,14 +484,18 @@ HLoopOptimization::HLoopOptimization(HGraph* graph,
vector_preheader_(nullptr),
vector_header_(nullptr),
vector_body_(nullptr),
- vector_index_(nullptr) {
+ vector_index_(nullptr),
+ arch_loop_helper_(ArchNoOptsLoopHelper::Create(compiler_options_ != nullptr
+ ? compiler_options_->GetInstructionSet()
+ : InstructionSet::kNone,
+ global_allocator_)) {
}
-void HLoopOptimization::Run() {
+bool HLoopOptimization::Run() {
// Skip if there is no loop or the graph has try-catch/irreducible loops.
// TODO: make this less of a sledgehammer.
if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
- return;
+ return false;
}
// Phase-local allocator.
@@ -426,7 +503,7 @@ void HLoopOptimization::Run() {
loop_allocator_ = &allocator;
// Perform loop optimizations.
- LocalRun();
+ bool didLoopOpt = LocalRun();
if (top_loop_ == nullptr) {
graph_->SetHasLoops(false); // no more loops
}
@@ -434,13 +511,16 @@ void HLoopOptimization::Run() {
// Detach.
loop_allocator_ = nullptr;
last_loop_ = top_loop_ = nullptr;
+
+ return didLoopOpt;
}
//
// Loop setup and traversal.
//
-void HLoopOptimization::LocalRun() {
+bool HLoopOptimization::LocalRun() {
+ bool didLoopOpt = false;
// Build the linear order using the phase-local allocator. This step enables building
// a loop hierarchy that properly reflects the outer-inner and previous-next relation.
ScopedArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder));
@@ -472,7 +552,7 @@ void HLoopOptimization::LocalRun() {
vector_map_ = &map;
vector_permanent_map_ = &perm;
// Traverse.
- TraverseLoopsInnerToOuter(top_loop_);
+ didLoopOpt = TraverseLoopsInnerToOuter(top_loop_);
// Detach.
iset_ = nullptr;
reductions_ = nullptr;
@@ -480,6 +560,7 @@ void HLoopOptimization::LocalRun() {
vector_map_ = nullptr;
vector_permanent_map_ = nullptr;
}
+ return didLoopOpt;
}
void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
@@ -536,6 +617,7 @@ bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
// loop if the induction of any inner loop has changed.
if (TraverseLoopsInnerToOuter(node->inner)) {
induction_range_.ReVisit(node->loop_info);
+ changed = true;
}
// Repeat simplifications in the loop-body until no more changes occur.
// Note that since each simplification consists of eliminating code (without
@@ -622,7 +704,7 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
}
}
-bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+bool HLoopOptimization::TryOptimizeInnerLoopFinite(LoopNode* node) {
HBasicBlock* header = node->loop_info->GetHeader();
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Ensure loop header logic is finite.
@@ -692,6 +774,146 @@ bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
return false;
}
+bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
+ return TryOptimizeInnerLoopFinite(node) || TryPeelingAndUnrolling(node);
+}
+
+
+
+//
+// Scalar loop peeling and unrolling: generic part methods.
+//
+
+bool HLoopOptimization::TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ if (analysis_info->GetNumberOfExits() > 1) {
+ return false;
+ }
+
+ uint32_t unrolling_factor = arch_loop_helper_->GetScalarUnrollingFactor(analysis_info);
+ if (unrolling_factor == LoopAnalysisInfo::kNoUnrollingFactor) {
+ return false;
+ }
+
+ if (generate_code) {
+ // TODO: support other unrolling factors.
+ DCHECK_EQ(unrolling_factor, 2u);
+
+ // Perform unrolling.
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelUnrollSimpleHelper helper(loop_info, &induction_range_);
+ helper.DoUnrolling();
+
+ // Remove the redundant loop check after unrolling.
+ HIf* copy_hif =
+ helper.GetBasicBlockMap()->Get(loop_info->GetHeader())->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*copy_hif->IfTrueSuccessor()) ? 1 : 0;
+ copy_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ }
+ return true;
+}
+
+bool HLoopOptimization::TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code) {
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled()) {
+ return false;
+ }
+
+ if (analysis_info->GetNumberOfInvariantExits() == 0) {
+ return false;
+ }
+
+ if (generate_code) {
+ // Perform peeling.
+ PeelUnrollSimpleHelper helper(loop_info, &induction_range_);
+ helper.DoPeeling();
+
+ // Statically evaluate loop check after peeling for loop invariant condition.
+ const SuperblockCloner::HInstructionMap* hir_map = helper.GetInstructionMap();
+ for (auto entry : *hir_map) {
+ HInstruction* copy = entry.second;
+ if (copy->IsIf()) {
+ TryToEvaluateIfCondition(copy->AsIf(), graph_);
+ }
+ }
+ }
+
+ return true;
+}
+
+bool HLoopOptimization::TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code) {
+ // Fully unroll loops with a known and small trip count.
+ int64_t trip_count = analysis_info->GetTripCount();
+ if (!arch_loop_helper_->IsLoopPeelingEnabled() ||
+ trip_count == LoopAnalysisInfo::kUnknownTripCount ||
+ !arch_loop_helper_->IsFullUnrollingBeneficial(analysis_info)) {
+ return false;
+ }
+
+ if (generate_code) {
+ // Peeling of the N first iterations (where N equals to the trip count) will effectively
+ // eliminate the loop: after peeling we will have N sequential iterations copied into the loop
+ // preheader and the original loop. The trip count of this loop will be 0 as the sequential
+ // iterations are executed first and there are exactly N of them. Thus we can statically
+ // evaluate the loop exit condition to 'false' and fully eliminate it.
+ //
+ // Here is an example of full unrolling of a loop with a trip count 2:
+ //
+ // loop_cond_1
+ // loop_body_1 <- First iteration.
+ // |
+ // \ v
+ // ==\ loop_cond_2
+ // ==/ loop_body_2 <- Second iteration.
+ // / |
+ // <- v <-
+ // loop_cond \ loop_cond \ <- This cond is always false.
+ // loop_body _/ loop_body _/
+ //
+ HLoopInformation* loop_info = analysis_info->GetLoopInfo();
+ PeelByCount(loop_info, trip_count, &induction_range_);
+ HIf* loop_hif = loop_info->GetHeader()->GetLastInstruction()->AsIf();
+ int32_t constant = loop_info->Contains(*loop_hif->IfTrueSuccessor()) ? 0 : 1;
+ loop_hif->ReplaceInput(graph_->GetIntConstant(constant), 0u);
+ }
+
+ return true;
+}
+
+bool HLoopOptimization::TryPeelingAndUnrolling(LoopNode* node) {
+ // Don't run peeling/unrolling if compiler_options_ is nullptr (i.e., running under tests)
+ // as InstructionSet is needed.
+ if (compiler_options_ == nullptr) {
+ return false;
+ }
+
+ HLoopInformation* loop_info = node->loop_info;
+ int64_t trip_count = LoopAnalysis::GetLoopTripCount(loop_info, &induction_range_);
+ LoopAnalysisInfo analysis_info(loop_info);
+ LoopAnalysis::CalculateLoopBasicProperties(loop_info, &analysis_info, trip_count);
+
+ if (analysis_info.HasInstructionsPreventingScalarOpts() ||
+ arch_loop_helper_->IsLoopNonBeneficialForScalarOpts(&analysis_info)) {
+ return false;
+ }
+
+ if (!TryFullUnrolling(&analysis_info, /*generate_code*/ false) &&
+ !TryPeelingForLoopInvariantExitsElimination(&analysis_info, /*generate_code*/ false) &&
+ !TryUnrollingForBranchPenaltyReduction(&analysis_info, /*generate_code*/ false)) {
+ return false;
+ }
+
+ // Run 'IsLoopClonable' the last as it might be time-consuming.
+ if (!PeelUnrollHelper::IsLoopClonable(loop_info)) {
+ return false;
+ }
+
+ return TryFullUnrolling(&analysis_info) ||
+ TryPeelingForLoopInvariantExitsElimination(&analysis_info) ||
+ TryUnrollingForBranchPenaltyReduction(&analysis_info);
+}
+
//
// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
@@ -822,7 +1044,8 @@ void HLoopOptimization::Vectorize(LoopNode* node,
HBasicBlock* preheader = node->loop_info->GetPreHeader();
// Pick a loop unrolling factor for the vector loop.
- uint32_t unroll = GetUnrollingFactor(block, trip_count);
+ uint32_t unroll = arch_loop_helper_->GetSIMDUnrollingFactor(
+ block, trip_count, MaxNumberPeeled(), vector_length_);
uint32_t chunk = vector_length_ * unroll;
DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk));
@@ -927,7 +1150,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
ptc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Generate vector loop, possibly further unrolled:
@@ -954,7 +1177,7 @@ void HLoopOptimization::Vectorize(LoopNode* node,
vector_index_,
stc,
graph_->GetConstant(induc_type, 1),
- kNoUnrollingFactor);
+ LoopAnalysisInfo::kNoUnrollingFactor);
}
// Link reductions to their final uses.
@@ -1061,6 +1284,11 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
HInstruction* index = instruction->InputAt(1);
HInstruction* value = instruction->InputAt(2);
HInstruction* offset = nullptr;
+ // For narrow types, explicit type conversion may have been
+ // optimized way, so set the no hi bits restriction here.
+ if (DataType::Size(type) <= 2) {
+ restrictions |= kNoHiBits;
+ }
if (TrySetVectorType(type, &restrictions) &&
node->loop_info->IsDefinedOutOfTheLoop(base) &&
induction_range_.IsUnitStride(instruction, index, graph_, &offset) &&
@@ -1083,6 +1311,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node,
DataType::Type type = instruction->GetType();
// Recognize SAD idiom or direct reduction.
if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) ||
+ VectorizeDotProdIdiom(node, instruction, generate_code, type, restrictions) ||
(TrySetVectorType(type, &restrictions) &&
VectorizeUse(node, instruction, generate_code, type, restrictions))) {
if (generate_code) {
@@ -1275,49 +1504,37 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node,
return true;
}
}
- } else if (instruction->IsInvokeStaticOrDirect()) {
- // Accept particular intrinsics.
- HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble: {
- // Deal with vector restrictions.
- HInstruction* opa = instruction->InputAt(0);
- HInstruction* r = opa;
- bool is_unsigned = false;
- if (HasVectorRestrictions(restrictions, kNoAbs)) {
- return false;
- } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
- (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
- return false; // reject, unless operand is sign-extension narrower
- }
- // Accept ABS(x) for vectorizable operand.
- DCHECK(r != nullptr);
- if (generate_code && vector_mode_ != kVector) { // de-idiom
- r = opa;
- }
- if (VectorizeUse(node, r, generate_code, type, restrictions)) {
- if (generate_code) {
- GenerateVecOp(instruction,
- vector_map_->Get(r),
- nullptr,
- HVecOperation::ToProperType(type, is_unsigned));
- }
- return true;
- }
- return false;
+ } else if (instruction->IsAbs()) {
+ // Deal with vector restrictions.
+ HInstruction* opa = instruction->InputAt(0);
+ HInstruction* r = opa;
+ bool is_unsigned = false;
+ if (HasVectorRestrictions(restrictions, kNoAbs)) {
+ return false;
+ } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+ (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+ return false; // reject, unless operand is sign-extension narrower
+ }
+ // Accept ABS(x) for vectorizable operand.
+ DCHECK(r != nullptr);
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = opa;
+ }
+ if (VectorizeUse(node, r, generate_code, type, restrictions)) {
+ if (generate_code) {
+ GenerateVecOp(instruction,
+ vector_map_->Get(r),
+ nullptr,
+ HVecOperation::ToProperType(type, is_unsigned));
}
- default:
- return false;
- } // switch
+ return true;
+ }
}
return false;
}
uint32_t HLoopOptimization::GetVectorSizeInBytes() {
- switch (compiler_driver_->GetInstructionSet()) {
+ switch (compiler_options_->GetInstructionSet()) {
case InstructionSet::kArm:
case InstructionSet::kThumb2:
return 8; // 64-bit SIMD
@@ -1327,8 +1544,8 @@ uint32_t HLoopOptimization::GetVectorSizeInBytes() {
}
bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) {
- const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
- switch (compiler_driver_->GetInstructionSet()) {
+ const InstructionSetFeatures* features = compiler_options_->GetInstructionSetFeatures();
+ switch (compiler_options_->GetInstructionSet()) {
case InstructionSet::kArm:
case InstructionSet::kThumb2:
// Allow vectorization for all ARM devices, because Android assumes that
@@ -1337,11 +1554,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv | kNoReduction;
+ *restrictions |= kNoDiv | kNoReduction | kNoDotProd;
return TrySetVectorLength(8);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction | kNoDotProd;
return TrySetVectorLength(4);
case DataType::Type::kInt32:
*restrictions |= kNoDiv | kNoWideSAD;
@@ -1386,12 +1603,23 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |=
- kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
+ *restrictions |= kNoMul |
+ kNoDiv |
+ kNoShift |
+ kNoAbs |
+ kNoSignedHAdd |
+ kNoUnroundedHAdd |
+ kNoSAD |
+ kNoDotProd;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD;
+ *restrictions |= kNoDiv |
+ kNoAbs |
+ kNoSignedHAdd |
+ kNoUnroundedHAdd |
+ kNoSAD|
+ kNoDotProd;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv | kNoSAD;
@@ -1416,11 +1644,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoDotProd;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1445,11 +1673,11 @@ bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrict
case DataType::Type::kBool:
case DataType::Type::kUint8:
case DataType::Type::kInt8:
- *restrictions |= kNoDiv;
+ *restrictions |= kNoDiv | kNoDotProd;
return TrySetVectorLength(16);
case DataType::Type::kUint16:
case DataType::Type::kInt16:
- *restrictions |= kNoDiv | kNoStringCharAt;
+ *restrictions |= kNoDiv | kNoStringCharAt | kNoDotProd;
return TrySetVectorLength(8);
case DataType::Type::kInt32:
*restrictions |= kNoDiv;
@@ -1751,57 +1979,11 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org,
GENERATE_VEC(
new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc),
new (global_allocator_) HUShr(org_type, opa, opb, dex_pc));
- case HInstruction::kInvokeStaticOrDirect: {
- HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
- if (vector_mode_ == kVector) {
- switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsInt:
- case Intrinsics::kMathAbsLong:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathAbsDouble:
- DCHECK(opb == nullptr);
- vector = new (global_allocator_)
- HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc);
- break;
- default:
- LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId();
- UNREACHABLE();
- } // switch invoke
- } else {
- // In scalar code, simply clone the method invoke, and replace its operands with the
- // corresponding new scalar instructions in the loop. The instruction will get an
- // environment while being inserted from the instruction map in original program order.
- DCHECK(vector_mode_ == kSequential);
- size_t num_args = invoke->GetNumberOfArguments();
- HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
- global_allocator_,
- num_args,
- invoke->GetType(),
- invoke->GetDexPc(),
- invoke->GetDexMethodIndex(),
- invoke->GetResolvedMethod(),
- invoke->GetDispatchInfo(),
- invoke->GetInvokeType(),
- invoke->GetTargetMethod(),
- invoke->GetClinitCheckRequirement());
- HInputsRef inputs = invoke->GetInputs();
- size_t num_inputs = inputs.size();
- DCHECK_LE(num_args, num_inputs);
- DCHECK_EQ(num_inputs, new_invoke->GetInputs().size()); // both invokes agree
- for (size_t index = 0; index < num_inputs; ++index) {
- HInstruction* new_input = index < num_args
- ? vector_map_->Get(inputs[index])
- : inputs[index]; // beyond arguments: just pass through
- new_invoke->SetArgumentAt(index, new_input);
- }
- new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
- kNeedsEnvironmentOrCache,
- kNoSideEffects,
- kNoThrow);
- vector = new_invoke;
- }
- break;
- }
+ case HInstruction::kAbs:
+ DCHECK(opb == nullptr);
+ GENERATE_VEC(
+ new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc),
+ new (global_allocator_) HAbs(org_type, opa, dex_pc));
default:
break;
} // switch
@@ -1838,8 +2020,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* a = nullptr;
HInstruction* b = nullptr;
int64_t c = 0;
- if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
- DCHECK(a != nullptr && b != nullptr);
+ if (IsAddConst2(graph_, instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
// Accept c == 1 (rounded) or c == 0 (not rounded).
bool is_rounded = false;
if (c == 1) {
@@ -1861,8 +2042,7 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node,
}
// Accept recognized halving add for vectorizable operands. Vectorized code uses the
// shorthand idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = instruction->InputAt(0);
s = instruction->InputAt(1);
@@ -1912,21 +2092,11 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
HInstruction* v = instruction->InputAt(1);
HInstruction* a = nullptr;
HInstruction* b = nullptr;
- if (v->IsInvokeStaticOrDirect() &&
- (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt ||
- v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) {
- HInstruction* x = v->InputAt(0);
- if (x->GetType() == reduction_type) {
- int64_t c = 0;
- if (x->IsSub()) {
- a = x->InputAt(0);
- b = x->InputAt(1);
- } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) {
- b = graph_->GetConstant(reduction_type, -c); // hidden SUB!
- }
- }
- }
- if (a == nullptr || b == nullptr) {
+ if (v->IsAbs() &&
+ v->GetType() == reduction_type &&
+ IsSubConst2(graph_, v->InputAt(0), /*out*/ &a, /*out*/ &b)) {
+ DCHECK(a != nullptr && b != nullptr);
+ } else {
return false;
}
// Accept same-type or consistent sign extension for narrower-type on operands a and b.
@@ -1935,18 +2105,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
HInstruction* r = a;
HInstruction* s = b;
bool is_unsigned = false;
- DataType::Type sub_type = a->GetType();
- if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) {
- sub_type = b->GetType();
- }
- if (a->IsTypeConversion() &&
- DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
- sub_type = a->InputAt(0)->GetType();
- }
- if (b->IsTypeConversion() &&
- DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) {
- sub_type = b->InputAt(0)->GetType();
- }
+ DataType::Type sub_type = GetNarrowerType(a, b);
if (reduction_type != sub_type &&
(!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) {
return false;
@@ -1959,8 +2118,7 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
}
// Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand
// idiomatic operation. Sequential code uses the original scalar expressions.
- DCHECK(r != nullptr);
- DCHECK(s != nullptr);
+ DCHECK(r != nullptr && s != nullptr);
if (generate_code && vector_mode_ != kVector) { // de-idiom
r = s = v->InputAt(0);
}
@@ -1968,14 +2126,13 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
VectorizeUse(node, r, generate_code, sub_type, restrictions) &&
VectorizeUse(node, s, generate_code, sub_type, restrictions)) {
if (generate_code) {
- reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned);
if (vector_mode_ == kVector) {
vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate(
global_allocator_,
vector_map_->Get(q),
vector_map_->Get(r),
vector_map_->Get(s),
- reduction_type,
+ HVecOperation::ToProperType(reduction_type, is_unsigned),
GetOtherVL(reduction_type, sub_type, vector_length_),
kNoDexPc));
MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
@@ -1989,6 +2146,75 @@ bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node,
return false;
}
+// Method recognises the following dot product idiom:
+// q += a * b for operands a, b whose type is narrower than the reduction one.
+// Provided that the operands have the same type or are promoted to a wider form.
+// Since this may involve a vector length change, the idiom is handled by going directly
+// to a dot product node (rather than relying combining finer grained nodes later).
+bool HLoopOptimization::VectorizeDotProdIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type reduction_type,
+ uint64_t restrictions) {
+ if (!instruction->IsAdd() || (reduction_type != DataType::Type::kInt32)) {
+ return false;
+ }
+
+ HInstruction* q = instruction->InputAt(0);
+ HInstruction* v = instruction->InputAt(1);
+ if (!v->IsMul() || v->GetType() != reduction_type) {
+ return false;
+ }
+
+ HInstruction* a = v->InputAt(0);
+ HInstruction* b = v->InputAt(1);
+ HInstruction* r = a;
+ HInstruction* s = b;
+ DataType::Type op_type = GetNarrowerType(a, b);
+ bool is_unsigned = false;
+
+ if (!IsNarrowerOperands(a, b, op_type, &r, &s, &is_unsigned)) {
+ return false;
+ }
+ op_type = HVecOperation::ToProperType(op_type, is_unsigned);
+
+ if (!TrySetVectorType(op_type, &restrictions) ||
+ HasVectorRestrictions(restrictions, kNoDotProd)) {
+ return false;
+ }
+
+ DCHECK(r != nullptr && s != nullptr);
+ // Accept dot product idiom for vectorizable operands. Vectorized code uses the shorthand
+ // idiomatic operation. Sequential code uses the original scalar expressions.
+ if (generate_code && vector_mode_ != kVector) { // de-idiom
+ r = a;
+ s = b;
+ }
+ if (VectorizeUse(node, q, generate_code, op_type, restrictions) &&
+ VectorizeUse(node, r, generate_code, op_type, restrictions) &&
+ VectorizeUse(node, s, generate_code, op_type, restrictions)) {
+ if (generate_code) {
+ if (vector_mode_ == kVector) {
+ vector_map_->Put(instruction, new (global_allocator_) HVecDotProd(
+ global_allocator_,
+ vector_map_->Get(q),
+ vector_map_->Get(r),
+ vector_map_->Get(s),
+ reduction_type,
+ is_unsigned,
+ GetOtherVL(reduction_type, op_type, vector_length_),
+ kNoDexPc));
+ MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom);
+ } else {
+ GenerateVecOp(v, vector_map_->Get(r), vector_map_->Get(s), reduction_type);
+ GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type);
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
//
// Vectorization heuristics.
//
@@ -2048,41 +2274,6 @@ bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) {
return true;
}
-static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8;
-static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50;
-
-uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) {
- uint32_t max_peel = MaxNumberPeeled();
- switch (compiler_driver_->GetInstructionSet()) {
- case InstructionSet::kArm64: {
- // Don't unroll with insufficient iterations.
- // TODO: Unroll loops with unknown trip count.
- DCHECK_NE(vector_length_, 0u);
- if (trip_count < (2 * vector_length_ + max_peel)) {
- return kNoUnrollingFactor;
- }
- // Don't unroll for large loop body size.
- uint32_t instruction_count = block->GetInstructions().CountSize();
- if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) {
- return kNoUnrollingFactor;
- }
- // Find a beneficial unroll factor with the following restrictions:
- // - At least one iteration of the transformed loop should be executed.
- // - The loop body shouldn't be "too big" (heuristic).
- uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count;
- uint32_t uf2 = (trip_count - max_peel) / vector_length_;
- uint32_t unroll_factor =
- TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR}));
- DCHECK_GE(unroll_factor, 1u);
- return unroll_factor;
- }
- case InstructionSet::kX86:
- case InstructionSet::kX86_64:
- default:
- return kNoUnrollingFactor;
- }
-}
-
//
// Helpers.
//
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index d70751037b..1a842c4bf3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -20,12 +20,15 @@
#include "base/scoped_arena_allocator.h"
#include "base/scoped_arena_containers.h"
#include "induction_var_range.h"
+#include "loop_analysis.h"
#include "nodes.h"
#include "optimization.h"
+#include "superblock_cloner.h"
namespace art {
-class CompilerDriver;
+class CompilerOptions;
+class ArchNoOptsLoopHelper;
/**
* Loop optimizations. Builds a loop hierarchy and applies optimizations to
@@ -35,12 +38,12 @@ class CompilerDriver;
class HLoopOptimization : public HOptimization {
public:
HLoopOptimization(HGraph* graph,
- CompilerDriver* compiler_driver,
+ const CompilerOptions* compiler_options,
HInductionVarAnalysis* induction_analysis,
OptimizingCompilerStats* stats,
const char* name = kLoopOptimizationPassName);
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
@@ -79,6 +82,7 @@ class HLoopOptimization : public HOptimization {
kNoReduction = 1 << 9, // no reduction
kNoSAD = 1 << 10, // no sum of absolute differences (SAD)
kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening
+ kNoDotProd = 1 << 12, // no dot product
};
/*
@@ -118,7 +122,7 @@ class HLoopOptimization : public HOptimization {
// Loop setup and traversal.
//
- void LocalRun();
+ bool LocalRun();
void AddLoop(HLoopInformation* loop_info);
void RemoveLoop(LoopNode* node);
@@ -133,10 +137,34 @@ class HLoopOptimization : public HOptimization {
void SimplifyInduction(LoopNode* node);
void SimplifyBlocks(LoopNode* node);
- // Performs optimizations specific to inner loop (empty loop removal,
+ // Performs optimizations specific to inner loop with finite header logic (empty loop removal,
// unrolling, vectorization). Returns true if anything changed.
+ bool TryOptimizeInnerLoopFinite(LoopNode* node);
+
+ // Performs optimizations specific to inner loop. Returns true if anything changed.
bool OptimizeInnerLoop(LoopNode* node);
+ // Tries to apply loop unrolling for branch penalty reduction and better instruction scheduling
+ // opportunities. Returns whether transformation happened. 'generate_code' determines whether the
+ // optimization should be actually applied.
+ bool TryUnrollingForBranchPenaltyReduction(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to apply loop peeling for loop invariant exits elimination. Returns whether
+ // transformation happened. 'generate_code' determines whether the optimization should be
+ // actually applied.
+ bool TryPeelingForLoopInvariantExitsElimination(LoopAnalysisInfo* analysis_info,
+ bool generate_code = true);
+
+ // Tries to perform whole loop unrolling for a small loop with a small trip count to eliminate
+ // the loop check overhead and to have more opportunities for inter-iteration optimizations.
+ // Returns whether transformation happened. 'generate_code' determines whether the optimization
+ // should be actually applied.
+ bool TryFullUnrolling(LoopAnalysisInfo* analysis_info, bool generate_code = true);
+
+ // Tries to apply scalar loop peeling and unrolling.
+ bool TryPeelingAndUnrolling(LoopNode* node);
+
//
// Vectorization analysis and synthesis.
//
@@ -175,6 +203,11 @@ class HLoopOptimization : public HOptimization {
DataType::Type type);
// Vectorization idioms.
+ bool VectorizeSaturationIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type type,
+ uint64_t restrictions);
bool VectorizeHalvingAddIdiom(LoopNode* node,
HInstruction* instruction,
bool generate_code,
@@ -185,6 +218,11 @@ class HLoopOptimization : public HOptimization {
bool generate_code,
DataType::Type type,
uint64_t restrictions);
+ bool VectorizeDotProdIdiom(LoopNode* node,
+ HInstruction* instruction,
+ bool generate_code,
+ DataType::Type type,
+ uint64_t restrictions);
// Vectorization heuristics.
Alignment ComputeAlignment(HInstruction* offset,
@@ -195,7 +233,6 @@ class HLoopOptimization : public HOptimization {
const ArrayReference* peeling_candidate);
uint32_t MaxNumberPeeled();
bool IsVectorizationProfitable(int64_t trip_count);
- uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
//
// Helpers.
@@ -225,8 +262,8 @@ class HLoopOptimization : public HOptimization {
void RemoveDeadInstructions(const HInstructionList& list);
bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
- // Compiler driver (to query ISA features).
- const CompilerDriver* compiler_driver_;
+ // Compiler options (to query ISA features).
+ const CompilerOptions* compiler_options_;
// Range information based on prior induction variable analysis.
InductionVarRange induction_range_;
@@ -289,6 +326,9 @@ class HLoopOptimization : public HOptimization {
HBasicBlock* vector_body_; // body of the new loop
HInstruction* vector_index_; // normalized index of the new loop
+ // Helper for target-specific behaviour for loop optimizations.
+ ArchNoOptsLoopHelper* arch_loop_helper_;
+
friend class LoopOptimizationTest;
DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index db8368986c..310d98b5b0 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -29,7 +29,8 @@ class LoopOptimizationTest : public OptimizingUnitTest {
LoopOptimizationTest()
: graph_(CreateGraph()),
iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)),
- loop_opt_(new (GetAllocator()) HLoopOptimization(graph_, nullptr, iva_, nullptr)) {
+ loop_opt_(new (GetAllocator()) HLoopOptimization(
+ graph_, /* compiler_options= */ nullptr, iva_, /* stats= */ nullptr)) {
BuildGraph();
}
@@ -227,11 +228,14 @@ TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) {
graph_->ClearDominanceInformation();
graph_->BuildDominatorTree();
+ // BuildDominatorTree inserts a block beetween loop header and entry block.
+ EXPECT_EQ(header->GetPredecessors()[0]->GetSinglePredecessor(), entry_block_);
+
// Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs
// are still mapped correctly to the block predecessors.
for (size_t i = 0, e = phi->InputCount(); i < e; i++) {
HInstruction* input = phi->InputAt(i);
- ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
+ EXPECT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i]));
}
}
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index f6ba19f22a..1940d55a9d 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -20,8 +20,10 @@
#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_vector-inl.h"
+#include "base/logging.h"
#include "base/stl_util.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "code_generator.h"
#include "common_dominator.h"
#include "intrinsics.h"
@@ -40,10 +42,9 @@ static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD ==
void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) {
ScopedObjectAccess soa(Thread::Current());
// Create the inexact Object reference type and store it in the HGraph.
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
inexact_object_rti_ = ReferenceTypeInfo::Create(
- handles->NewHandle(linker->GetClassRoot(ClassLinker::kJavaLangObject)),
- /* is_exact */ false);
+ handles->NewHandle(GetClassRoot<mirror::Object>()),
+ /* is_exact= */ false);
}
void HGraph::AddBlock(HBasicBlock* block) {
@@ -59,7 +60,7 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) {
ScopedArenaAllocator allocator(GetArenaStack());
// Nodes that we're currently visiting, indexed by block id.
ArenaBitVector visiting(
- &allocator, blocks_.size(), /* expandable */ false, kArenaAllocGraphBuilder);
+ &allocator, blocks_.size(), /* expandable= */ false, kArenaAllocGraphBuilder);
visiting.ClearAllBits();
// Number of successors visited from a given node, indexed by block id.
ScopedArenaVector<size_t> successors_visited(blocks_.size(),
@@ -146,7 +147,9 @@ void HGraph::RemoveInstructionsAsUsersFromDeadBlocks(const ArenaBitVector& visit
if (!visited.IsBitSet(i)) {
HBasicBlock* block = blocks_[i];
if (block == nullptr) continue;
- DCHECK(block->GetPhis().IsEmpty()) << "Phis are not inserted at this stage";
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ RemoveAsUser(it.Current());
+ }
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
RemoveAsUser(it.Current());
}
@@ -688,7 +691,7 @@ HCurrentMethod* HGraph::GetCurrentMethod() {
}
const char* HGraph::GetMethodName() const {
- const DexFile::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
+ const dex::MethodId& method_id = dex_file_.GetMethodId(method_idx_);
return dex_file_.GetMethodName(method_id);
}
@@ -825,7 +828,7 @@ void HLoopInformation::Populate() {
ScopedArenaAllocator allocator(graph->GetArenaStack());
ArenaBitVector visited(&allocator,
graph->GetBlocks().size(),
- /* expandable */ false,
+ /* expandable= */ false,
kArenaAllocGraphBuilder);
visited.ClearAllBits();
// Stop marking blocks at the loop header.
@@ -1121,6 +1124,23 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const {
user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node);
}
+void HEnvironment::ReplaceInput(HInstruction* replacement, size_t index) {
+ const HUserRecord<HEnvironment*>& env_use_record = vregs_[index];
+ HInstruction* orig_instr = env_use_record.GetInstruction();
+
+ DCHECK(orig_instr != replacement);
+
+ HUseList<HEnvironment*>::iterator before_use_node = env_use_record.GetBeforeUseNode();
+ // Note: fixup_end remains valid across splice_after().
+ auto fixup_end = replacement->env_uses_.empty() ? replacement->env_uses_.begin()
+ : ++replacement->env_uses_.begin();
+ replacement->env_uses_.splice_after(replacement->env_uses_.before_begin(),
+ env_use_record.GetInstruction()->env_uses_,
+ before_use_node);
+ replacement->FixUpUserRecordsAfterEnvUseInsertion(fixup_end);
+ orig_instr->FixUpUserRecordsAfterEnvUseRemoval(before_use_node);
+}
+
HInstruction* HInstruction::GetNextDisregardingMoves() const {
HInstruction* next = GetNext();
while (next != nullptr && next->IsParallelMove()) {
@@ -1213,7 +1233,7 @@ bool HInstructionList::FoundBefore(const HInstruction* instruction1,
}
}
LOG(FATAL) << "Did not find an order between two instructions of the same block.";
- return true;
+ UNREACHABLE();
}
bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const {
@@ -1236,7 +1256,7 @@ bool HInstruction::StrictlyDominates(HInstruction* other_instruction) const {
} else {
// There is no order among phis.
LOG(FATAL) << "There is no dominance between phis of a same block.";
- return false;
+ UNREACHABLE();
}
} else {
// `this` is not a phi.
@@ -1284,6 +1304,28 @@ void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction*
++it;
if (dominator->StrictlyDominates(user)) {
user->ReplaceInput(replacement, index);
+ } else if (user->IsPhi() && !user->AsPhi()->IsCatchPhi()) {
+ // If the input flows from a block dominated by `dominator`, we can replace it.
+ // We do not perform this for catch phis as we don't have control flow support
+ // for their inputs.
+ const ArenaVector<HBasicBlock*>& predecessors = user->GetBlock()->GetPredecessors();
+ HBasicBlock* predecessor = predecessors[index];
+ if (dominator->GetBlock()->Dominates(predecessor)) {
+ user->ReplaceInput(replacement, index);
+ }
+ }
+ }
+}
+
+void HInstruction::ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+ const HUseList<HEnvironment*>& uses = GetEnvUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HEnvironment* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (dominator->StrictlyDominates(user->GetHolder())) {
+ user->ReplaceInput(replacement, index);
}
}
}
@@ -1680,10 +1722,9 @@ bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
}
bool HInstruction::Equals(const HInstruction* other) const {
- if (!InstructionTypeEquals(other)) return false;
- DCHECK_EQ(GetKind(), other->GetKind());
- if (!InstructionDataEquals(other)) return false;
+ if (GetKind() != other->GetKind()) return false;
if (GetType() != other->GetType()) return false;
+ if (!InstructionDataEquals(other)) return false;
HConstInputsRef inputs = GetInputs();
HConstInputsRef other_inputs = other->GetInputs();
if (inputs.size() != other_inputs.size()) return false;
@@ -1698,7 +1739,7 @@ bool HInstruction::Equals(const HInstruction* other) const {
std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs) {
#define DECLARE_CASE(type, super) case HInstruction::k##type: os << #type; break;
switch (rhs) {
- FOR_EACH_INSTRUCTION(DECLARE_CASE)
+ FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_CASE)
default:
os << "Unknown instruction kind " << static_cast<int>(rhs);
break;
@@ -1952,6 +1993,11 @@ bool HBasicBlock::EndsWithControlFlowInstruction() const {
return !GetInstructions().IsEmpty() && GetLastInstruction()->IsControlFlow();
}
+bool HBasicBlock::EndsWithReturn() const {
+ return !GetInstructions().IsEmpty() &&
+ (GetLastInstruction()->IsReturn() || GetLastInstruction()->IsReturnVoid());
+}
+
bool HBasicBlock::EndsWithIf() const {
return !GetInstructions().IsEmpty() && GetLastInstruction()->IsIf();
}
@@ -2483,7 +2529,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
current->SetGraph(outer_graph);
outer_graph->AddBlock(current);
outer_graph->reverse_post_order_[++index_of_at] = current;
- UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge */ false);
+ UpdateLoopAndTryInformationOfNewBlock(current, at, /* replace_if_back_edge= */ false);
}
}
@@ -2493,7 +2539,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) {
outer_graph->reverse_post_order_[++index_of_at] = to;
// Only `to` can become a back edge, as the inlined blocks
// are predecessors of `to`.
- UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge */ true);
+ UpdateLoopAndTryInformationOfNewBlock(to, at, /* replace_if_back_edge= */ true);
// Update all predecessors of the exit block (now the `to` block)
// to not `HReturn` but `HGoto` instead. Special case throwing blocks
@@ -2667,13 +2713,13 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) {
DCHECK((old_pre_header->GetLoopInformation() == nullptr) ||
!old_pre_header->GetLoopInformation()->IsBackEdge(*old_pre_header));
UpdateLoopAndTryInformationOfNewBlock(
- if_block, old_pre_header, /* replace_if_back_edge */ false);
+ if_block, old_pre_header, /* replace_if_back_edge= */ false);
UpdateLoopAndTryInformationOfNewBlock(
- true_block, old_pre_header, /* replace_if_back_edge */ false);
+ true_block, old_pre_header, /* replace_if_back_edge= */ false);
UpdateLoopAndTryInformationOfNewBlock(
- false_block, old_pre_header, /* replace_if_back_edge */ false);
+ false_block, old_pre_header, /* replace_if_back_edge= */ false);
UpdateLoopAndTryInformationOfNewBlock(
- new_pre_header, old_pre_header, /* replace_if_back_edge */ false);
+ new_pre_header, old_pre_header, /* replace_if_back_edge= */ false);
}
HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
@@ -2765,6 +2811,14 @@ void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) {
SetPackedFlag<kFlagReferenceTypeIsExact>(rti.IsExact());
}
+bool HBoundType::InstructionDataEquals(const HInstruction* other) const {
+ const HBoundType* other_bt = other->AsBoundType();
+ ScopedObjectAccess soa(Thread::Current());
+ return GetUpperBound().IsEqual(other_bt->GetUpperBound()) &&
+ GetUpperCanBeNull() == other_bt->GetUpperCanBeNull() &&
+ CanBeNull() == other_bt->CanBeNull();
+}
+
void HBoundType::SetUpperBound(const ReferenceTypeInfo& upper_bound, bool can_be_null) {
if (kIsDebugBuild) {
ScopedObjectAccess soa(Thread::Current());
@@ -2850,8 +2904,7 @@ void HInvoke::SetIntrinsic(Intrinsics intrinsic,
}
bool HNewInstance::IsStringAlloc() const {
- ScopedObjectAccess soa(Thread::Current());
- return GetReferenceTypeInfo().IsStringClass();
+ return GetEntrypoint() == kQuickAllocStringObject;
}
bool HInvoke::NeedsEnvironment() const {
@@ -2889,10 +2942,12 @@ std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind
return os << "Recursive";
case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
- case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
- return os << "DirectAddress";
+ case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
return os << "BssEntry";
+ case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
+ return os << "JitDirectAddress";
case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
return os << "RuntimeCall";
default:
@@ -2924,8 +2979,8 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const {
return false;
}
switch (GetLoadKind()) {
- case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageClassTable:
+ case LoadKind::kBootImageRelRo:
+ case LoadKind::kJitBootImageAddress:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetClass().Get() == other_load_class->GetClass().Get();
@@ -2942,12 +2997,12 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) {
return os << "ReferrersClass";
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
- case HLoadClass::LoadKind::kBootImageAddress:
- return os << "BootImageAddress";
- case HLoadClass::LoadKind::kBootImageClassTable:
- return os << "BootImageClassTable";
+ case HLoadClass::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadClass::LoadKind::kBssEntry:
return os << "BssEntry";
+ case HLoadClass::LoadKind::kJitBootImageAddress:
+ return os << "JitBootImageAddress";
case HLoadClass::LoadKind::kJitTableAddress:
return os << "JitTableAddress";
case HLoadClass::LoadKind::kRuntimeCall:
@@ -2967,8 +3022,8 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const {
return false;
}
switch (GetLoadKind()) {
- case LoadKind::kBootImageAddress:
- case LoadKind::kBootImageInternTable:
+ case LoadKind::kBootImageRelRo:
+ case LoadKind::kJitBootImageAddress:
case LoadKind::kJitTableAddress: {
ScopedObjectAccess soa(Thread::Current());
return GetString().Get() == other_load_string->GetString().Get();
@@ -2982,12 +3037,12 @@ std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) {
switch (rhs) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
return os << "BootImageLinkTimePcRelative";
- case HLoadString::LoadKind::kBootImageAddress:
- return os << "BootImageAddress";
- case HLoadString::LoadKind::kBootImageInternTable:
- return os << "BootImageInternTable";
+ case HLoadString::LoadKind::kBootImageRelRo:
+ return os << "BootImageRelRo";
case HLoadString::LoadKind::kBssEntry:
return os << "BssEntry";
+ case HLoadString::LoadKind::kJitBootImageAddress:
+ return os << "JitBootImageAddress";
case HLoadString::LoadKind::kJitTableAddress:
return os << "JitTableAddress";
case HLoadString::LoadKind::kRuntimeCall:
@@ -3101,6 +3156,8 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs) {
return os << "array_object_check";
case TypeCheckKind::kArrayCheck:
return os << "array_check";
+ case TypeCheckKind::kBitstringCheck:
+ return os << "bitstring_check";
default:
LOG(FATAL) << "Unknown TypeCheckKind: " << static_cast<int>(rhs);
UNREACHABLE();
@@ -3126,4 +3183,77 @@ std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind) {
}
}
+// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags.
+#define CHECK_INTRINSICS_ENUM_VALUES(Name, InvokeType, _, SideEffects, Exceptions, ...) \
+ static_assert( \
+ static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \
+ "Instrinsics enumeration space overflow.");
+#include "intrinsics_list.h"
+ INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES)
+#undef INTRINSICS_LIST
+#undef CHECK_INTRINSICS_ENUM_VALUES
+
+// Function that returns whether an intrinsic needs an environment or not.
+static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCacheIntrinsic(Intrinsics i) {
+ switch (i) {
+ case Intrinsics::kNone:
+ return kNeedsEnvironmentOrCache; // Non-sensical for intrinsic.
+#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \
+ case Intrinsics::k ## Name: \
+ return NeedsEnvOrCache;
+#include "intrinsics_list.h"
+ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return kNeedsEnvironmentOrCache;
+}
+
+// Function that returns whether an intrinsic has side effects.
+static inline IntrinsicSideEffects GetSideEffectsIntrinsic(Intrinsics i) {
+ switch (i) {
+ case Intrinsics::kNone:
+ return kAllSideEffects;
+#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \
+ case Intrinsics::k ## Name: \
+ return SideEffects;
+#include "intrinsics_list.h"
+ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return kAllSideEffects;
+}
+
+// Function that returns whether an intrinsic can throw exceptions.
+static inline IntrinsicExceptions GetExceptionsIntrinsic(Intrinsics i) {
+ switch (i) {
+ case Intrinsics::kNone:
+ return kCanThrow;
+#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvOrCache, SideEffects, Exceptions, ...) \
+ case Intrinsics::k ## Name: \
+ return Exceptions;
+#include "intrinsics_list.h"
+ INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+ }
+ return kCanThrow;
+}
+
+void HInvoke::SetResolvedMethod(ArtMethod* method) {
+ // TODO: b/65872996 The intent is that polymorphic signature methods should
+ // be compiler intrinsics. At present, they are only interpreter intrinsics.
+ if (method != nullptr &&
+ method->IsIntrinsic() &&
+ !method->IsPolymorphicSignature()) {
+ Intrinsics intrinsic = static_cast<Intrinsics>(method->GetIntrinsic());
+ SetIntrinsic(intrinsic,
+ NeedsEnvironmentOrCacheIntrinsic(intrinsic),
+ GetSideEffectsIntrinsic(intrinsic),
+ GetExceptionsIntrinsic(intrinsic));
+ }
+ resolved_method_ = method;
+}
+
} // namespace art
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index fe992a7f39..fedad0c69a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -26,9 +26,11 @@
#include "base/arena_object.h"
#include "base/array_ref.h"
#include "base/iteration_range.h"
+#include "base/mutex.h"
#include "base/quasi_atomic.h"
#include "base/stl_util.h"
#include "base/transform_array_ref.h"
+#include "art_method.h"
#include "data_type.h"
#include "deoptimization_kind.h"
#include "dex/dex_file.h"
@@ -41,6 +43,7 @@
#include "intrinsics_enum.h"
#include "locations.h"
#include "mirror/class.h"
+#include "mirror/method_type.h"
#include "offsets.h"
#include "utils/intrusive_forward_list.h"
@@ -127,6 +130,7 @@ enum GraphAnalysisResult {
kAnalysisInvalidBytecode,
kAnalysisFailThrowCatchLoop,
kAnalysisFailAmbiguousArrayOp,
+ kAnalysisFailIrreducibleLoopAndStringInit,
kAnalysisSuccess,
};
@@ -313,6 +317,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
uint32_t method_idx,
InstructionSet instruction_set,
InvokeType invoke_type = kInvalidInvokeType,
+ bool dead_reference_safe = false,
bool debuggable = false,
bool osr = false,
int start_instruction_id = 0)
@@ -332,6 +337,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_simd_(false),
has_loops_(false),
has_irreducible_loops_(false),
+ dead_reference_safe_(dead_reference_safe),
debuggable_(debuggable),
current_instruction_id_(start_instruction_id),
dex_file_(dex_file),
@@ -522,6 +528,12 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
has_bounds_checks_ = value;
}
+ // Is the code known to be robust against eliminating dead references
+ // and the effects of early finalization?
+ bool IsDeadReferenceSafe() const { return dead_reference_safe_; }
+
+ void MarkDeadReferenceUnsafe() { dead_reference_safe_ = false; }
+
bool IsDebuggable() const { return debuggable_; }
// Returns a constant of the given type and value. If it does not exist
@@ -700,6 +712,14 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// so there might be false positives.
bool has_irreducible_loops_;
+ // Is the code known to be robust against eliminating dead references
+ // and the effects of early finalization? If false, dead reference variables
+ // are kept if they might be visible to the garbage collector.
+ // Currently this means that the class was declared to be dead-reference-safe,
+ // the method accesses no reachability-sensitive fields or data, and the same
+ // is true for any methods that were inlined into the current one.
+ bool dead_reference_safe_;
+
// Indicates whether the graph should be compiled in a way that
// ensures full debuggability. If false, we can apply more
// aggressive optimizations that may limit the level of debugging.
@@ -891,7 +911,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
explicit TryCatchInformation(const HTryBoundary& try_entry)
: try_entry_(&try_entry),
catch_dex_file_(nullptr),
- catch_type_index_(DexFile::kDexNoIndex16) {
+ catch_type_index_(dex::TypeIndex::Invalid()) {
DCHECK(try_entry_ != nullptr);
}
@@ -910,9 +930,9 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
bool IsCatchBlock() const { return catch_dex_file_ != nullptr; }
- bool IsCatchAllTypeIndex() const {
+ bool IsValidTypeIndex() const {
DCHECK(IsCatchBlock());
- return !catch_type_index_.IsValid();
+ return catch_type_index_.IsValid();
}
dex::TypeIndex GetCatchTypeIndex() const {
@@ -925,6 +945,10 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
return *catch_dex_file_;
}
+ void SetInvalidTypeIndex() {
+ catch_type_index_ = dex::TypeIndex::Invalid();
+ }
+
private:
// One of possibly several TryBoundary instructions entering the block's try.
// Only set for try blocks.
@@ -932,7 +956,7 @@ class TryCatchInformation : public ArenaObject<kArenaAllocTryCatchInfo> {
// Exception type information. Only set for catch blocks.
const DexFile* catch_dex_file_;
- const dex::TypeIndex catch_type_index_;
+ dex::TypeIndex catch_type_index_;
};
static constexpr size_t kNoLifetime = -1;
@@ -1284,6 +1308,7 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> {
void SetLifetimeEnd(size_t end) { lifetime_end_ = end; }
bool EndsWithControlFlowInstruction() const;
+ bool EndsWithReturn() const;
bool EndsWithIf() const;
bool EndsWithTryBoundary() const;
bool HasSinglePhi() const;
@@ -1338,6 +1363,7 @@ class HLoopInformationOutwardIterator : public ValueObject {
#define FOR_EACH_CONCRETE_INSTRUCTION_COMMON(M) \
M(Above, Condition) \
M(AboveOrEqual, Condition) \
+ M(Abs, UnaryOperation) \
M(Add, BinaryOperation) \
M(And, BinaryOperation) \
M(ArrayGet, Instruction) \
@@ -1377,13 +1403,18 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(InvokeStaticOrDirect, Invoke) \
M(InvokeVirtual, Invoke) \
M(InvokePolymorphic, Invoke) \
+ M(InvokeCustom, Invoke) \
M(LessThan, Condition) \
M(LessThanOrEqual, Condition) \
M(LoadClass, Instruction) \
M(LoadException, Instruction) \
+ M(LoadMethodHandle, Instruction) \
+ M(LoadMethodType, Instruction) \
M(LoadString, Instruction) \
M(LongConstant, Constant) \
+ M(Max, Instruction) \
M(MemoryBarrier, Instruction) \
+ M(Min, BinaryOperation) \
M(MonitorOperation, Instruction) \
M(Mul, BinaryOperation) \
M(NativeDebugInfo, Instruction) \
@@ -1437,12 +1468,15 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(VecAndNot, VecBinaryOperation) \
M(VecOr, VecBinaryOperation) \
M(VecXor, VecBinaryOperation) \
+ M(VecSaturationAdd, VecBinaryOperation) \
+ M(VecSaturationSub, VecBinaryOperation) \
M(VecShl, VecBinaryOperation) \
M(VecShr, VecBinaryOperation) \
M(VecUShr, VecBinaryOperation) \
M(VecSetScalars, VecOperation) \
M(VecMultiplyAccumulate, VecOperation) \
M(VecSADAccumulate, VecOperation) \
+ M(VecDotProd, VecOperation) \
M(VecLoad, VecMemoryOperation) \
M(VecStore, VecMemoryOperation) \
@@ -1484,6 +1518,14 @@ class HLoopInformationOutwardIterator : public ValueObject {
M(X86PackedSwitch, Instruction)
#endif
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
+#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M) \
+ M(X86AndNot, Instruction) \
+ M(X86MaskOrResetLeastSetBit, Instruction)
+#else
+#define FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)
+#endif
+
#define FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
#define FOR_EACH_CONCRETE_INSTRUCTION(M) \
@@ -1494,7 +1536,8 @@ class HLoopInformationOutwardIterator : public ValueObject {
FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \
FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) \
FOR_EACH_CONCRETE_INSTRUCTION_X86(M) \
- FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M)
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_64(M) \
+ FOR_EACH_CONCRETE_INSTRUCTION_X86_COMMON(M)
#define FOR_EACH_ABSTRACT_INSTRUCTION(M) \
M(Condition, BinaryOperation) \
@@ -1519,23 +1562,17 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION)
private: \
H##type& operator=(const H##type&) = delete; \
public: \
- const char* DebugName() const OVERRIDE { return #type; } \
- bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \
- return other->Is##type(); \
- } \
- HInstruction* Clone(ArenaAllocator* arena) const OVERRIDE { \
+ const char* DebugName() const override { return #type; } \
+ HInstruction* Clone(ArenaAllocator* arena) const override { \
DCHECK(IsClonable()); \
return new (arena) H##type(*this->As##type()); \
} \
- void Accept(HGraphVisitor* visitor) OVERRIDE
+ void Accept(HGraphVisitor* visitor) override
#define DECLARE_ABSTRACT_INSTRUCTION(type) \
private: \
H##type& operator=(const H##type&) = delete; \
- public: \
- bool Is##type() const { return As##type() != nullptr; } \
- const H##type* As##type() const { return this; } \
- H##type* As##type() { return this; }
+ public:
#define DEFAULT_COPY_CONSTRUCTOR(type) \
explicit H##type(const H##type& other) = default;
@@ -1622,6 +1659,21 @@ using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInp
* the same, and any reference read depends on any reference read without
* further regard of its type).
*
+ * kDependsOnGCBit is defined in the following way: instructions with kDependsOnGCBit must not be
+ * alive across the point where garbage collection might happen.
+ *
+ * Note: Instructions with kCanTriggerGCBit do not depend on each other.
+ *
+ * kCanTriggerGCBit must be used for instructions for which GC might happen on the path across
+ * those instructions from the compiler perspective (between this instruction and the next one
+ * in the IR).
+ *
+ * Note: Instructions which can cause GC only on a fatal slow path do not need
+ * kCanTriggerGCBit as the execution never returns to the instruction next to the exceptional
+ * one. However the execution may return to compiled code if there is a catch block in the
+ * current method; for this purpose the TryBoundary exit instruction has kCanTriggerGCBit
+ * set.
+ *
* The internal representation uses 38-bit and is described in the table below.
* The first line indicates the side effect, and for field/array accesses the
* second line indicates the type of the access (in the order of the
@@ -1694,10 +1746,17 @@ class SideEffects : public ValueObject {
return SideEffects(TypeFlag(type, kArrayReadOffset));
}
+ // Returns whether GC might happen across this instruction from the compiler perspective so
+ // the next instruction in the IR would see that.
+ //
+ // See the SideEffect class comments.
static SideEffects CanTriggerGC() {
return SideEffects(1ULL << kCanTriggerGCBit);
}
+ // Returns whether the instruction must not be alive across a GC point.
+ //
+ // See the SideEffect class comments.
static SideEffects DependsOnGC() {
return SideEffects(1ULL << kDependsOnGCBit);
}
@@ -1906,6 +1965,11 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
void RemoveAsUserOfInput(size_t index) const;
+ // Replaces the input at the position 'index' with the replacement; the replacement and old
+ // input instructions' env_uses_ lists are adjusted. The function works similar to
+ // HInstruction::ReplaceInput.
+ void ReplaceInput(HInstruction* replacement, size_t index);
+
size_t Size() const { return vregs_.size(); }
HEnvironment* GetParent() const { return parent_; }
@@ -1954,12 +2018,15 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
public:
#define DECLARE_KIND(type, super) k##type,
enum InstructionKind {
- FOR_EACH_INSTRUCTION(DECLARE_KIND)
+ FOR_EACH_CONCRETE_INSTRUCTION(DECLARE_KIND)
kLastInstructionKind
};
#undef DECLARE_KIND
HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc)
+ : HInstruction(kind, DataType::Type::kVoid, side_effects, dex_pc) {}
+
+ HInstruction(InstructionKind kind, DataType::Type type, SideEffects side_effects, uint32_t dex_pc)
: previous_(nullptr),
next_(nullptr),
block_(nullptr),
@@ -1974,6 +2041,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
side_effects_(side_effects),
reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) {
SetPackedField<InstructionKindField>(kind);
+ SetPackedField<TypeField>(type);
SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact());
}
@@ -2031,7 +2099,9 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
virtual void Accept(HGraphVisitor* visitor) = 0;
virtual const char* DebugName() const = 0;
- virtual DataType::Type GetType() const { return DataType::Type::kVoid; }
+ DataType::Type GetType() const {
+ return TypeField::Decode(GetPackedFields());
+ }
virtual bool NeedsEnvironment() const { return false; }
@@ -2064,6 +2134,19 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
return false;
}
+ // If this instruction will do an implicit null check, return the `HNullCheck` associated
+ // with it. Otherwise return null.
+ HNullCheck* GetImplicitNullCheck() const {
+ // Find the first previous instruction which is not a move.
+ HInstruction* first_prev_not_move = GetPreviousDisregardingMoves();
+ if (first_prev_not_move != nullptr &&
+ first_prev_not_move->IsNullCheck() &&
+ first_prev_not_move->IsEmittedAtUseSite()) {
+ return first_prev_not_move->AsNullCheck();
+ }
+ return nullptr;
+ }
+
virtual bool IsActualObject() const {
return GetType() == DataType::Type::kReference;
}
@@ -2202,6 +2285,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void ReplaceWith(HInstruction* instruction);
void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
+ void ReplaceEnvUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
void ReplaceInput(HInstruction* replacement, size_t index);
// This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2223,19 +2307,17 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
void MoveBeforeFirstUserAndOutOfLoops();
#define INSTRUCTION_TYPE_CHECK(type, super) \
- bool Is##type() const; \
- const H##type* As##type() const; \
- H##type* As##type();
+ bool Is##type() const;
- FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+ FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
#undef INSTRUCTION_TYPE_CHECK
-#define INSTRUCTION_TYPE_CHECK(type, super) \
- bool Is##type() const { return (As##type() != nullptr); } \
- virtual const H##type* As##type() const { return nullptr; } \
- virtual H##type* As##type() { return nullptr; }
- FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
-#undef INSTRUCTION_TYPE_CHECK
+#define INSTRUCTION_TYPE_CAST(type, super) \
+ const H##type* As##type() const; \
+ H##type* As##type();
+
+ FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
+#undef INSTRUCTION_TYPE_CAST
// Return a clone of the instruction if it is clonable (shallow copy by default, custom copy
// if a custom copy-constructor is provided for a particular type). If IsClonable() is false for
@@ -2261,11 +2343,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// meanings? split and rename?
virtual bool CanBeMoved() const { return false; }
- // Returns whether the two instructions are of the same kind.
- virtual bool InstructionTypeEquals(const HInstruction* other ATTRIBUTE_UNUSED) const {
- return false;
- }
-
// Returns whether any data encoded in the two instructions is equal.
// This method does not look at the inputs. Both instructions must be
// of the same type, otherwise the method has undefined behavior.
@@ -2278,10 +2355,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
// 2) Their inputs are identical.
bool Equals(const HInstruction* other) const;
- // TODO: Remove this indirection when the [[pure]] attribute proposal (n3744)
- // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide
- // the virtual function because the __attribute__((__pure__)) doesn't really
- // apply the strong requirement for virtual functions, preventing optimizations.
InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); }
virtual size_t ComputeHashCode() const {
@@ -2337,13 +2410,18 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> {
static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1;
static constexpr size_t kFieldInstructionKindSize =
MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1));
- static constexpr size_t kNumberOfGenericPackedBits =
+ static constexpr size_t kFieldType =
kFieldInstructionKind + kFieldInstructionKindSize;
+ static constexpr size_t kFieldTypeSize =
+ MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
+ static constexpr size_t kNumberOfGenericPackedBits = kFieldType + kFieldTypeSize;
static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte;
static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits,
"Too many generic packed fields");
+ using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>;
+
const HUserRecord<HInstruction*> InputRecordAt(size_t i) const {
return GetInputRecords()[i];
}
@@ -2568,7 +2646,7 @@ class HBackwardInstructionIterator : public ValueObject {
class HVariableInputSizeInstruction : public HInstruction {
public:
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override {
return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
}
@@ -2590,6 +2668,15 @@ class HVariableInputSizeInstruction : public HInstruction {
ArenaAllocKind kind)
: HInstruction(inst_kind, side_effects, dex_pc),
inputs_(number_of_inputs, allocator->Adapter(kind)) {}
+ HVariableInputSizeInstruction(InstructionKind inst_kind,
+ DataType::Type type,
+ SideEffects side_effects,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ size_t number_of_inputs,
+ ArenaAllocKind kind)
+ : HInstruction(inst_kind, type, side_effects, dex_pc),
+ inputs_(number_of_inputs, allocator->Adapter(kind)) {}
DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction);
@@ -2597,19 +2684,24 @@ class HVariableInputSizeInstruction : public HInstruction {
};
template<size_t N>
-class HTemplateInstruction: public HInstruction {
+class HExpression : public HInstruction {
public:
- HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc)
+ HExpression<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc)
: HInstruction(kind, side_effects, dex_pc), inputs_() {}
- virtual ~HTemplateInstruction() {}
+ HExpression<N>(InstructionKind kind,
+ DataType::Type type,
+ SideEffects side_effects,
+ uint32_t dex_pc)
+ : HInstruction(kind, type, side_effects, dex_pc), inputs_() {}
+ virtual ~HExpression() {}
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
return ArrayRef<HUserRecord<HInstruction*>>(inputs_);
}
protected:
- DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<N>);
+ DEFAULT_COPY_CONSTRUCTOR(Expression<N>);
private:
std::array<HUserRecord<HInstruction*>, N> inputs_;
@@ -2617,64 +2709,35 @@ class HTemplateInstruction: public HInstruction {
friend class SsaBuilder;
};
-// HTemplateInstruction specialization for N=0.
+// HExpression specialization for N=0.
template<>
-class HTemplateInstruction<0>: public HInstruction {
+class HExpression<0> : public HInstruction {
public:
- explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc)
- : HInstruction(kind, side_effects, dex_pc) {}
+ using HInstruction::HInstruction;
- virtual ~HTemplateInstruction() {}
+ virtual ~HExpression() {}
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
return ArrayRef<HUserRecord<HInstruction*>>();
}
protected:
- DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<0>);
+ DEFAULT_COPY_CONSTRUCTOR(Expression<0>);
private:
friend class SsaBuilder;
};
-template<intptr_t N>
-class HExpression : public HTemplateInstruction<N> {
- public:
- using HInstruction::InstructionKind;
- HExpression<N>(InstructionKind kind,
- DataType::Type type,
- SideEffects side_effects,
- uint32_t dex_pc)
- : HTemplateInstruction<N>(kind, side_effects, dex_pc) {
- this->template SetPackedField<TypeField>(type);
- }
- virtual ~HExpression() {}
-
- DataType::Type GetType() const OVERRIDE {
- return TypeField::Decode(this->GetPackedFields());
- }
-
- protected:
- static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeSize =
- MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
- static constexpr size_t kNumberOfExpressionPackedBits = kFieldType + kFieldTypeSize;
- static_assert(kNumberOfExpressionPackedBits <= HInstruction::kMaxNumberOfPackedBits,
- "Too many packed fields.");
- using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>;
- DEFAULT_COPY_CONSTRUCTOR(Expression<N>);
-};
-
// Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow
// instruction that branches to the exit block.
-class HReturnVoid FINAL : public HTemplateInstruction<0> {
+class HReturnVoid final : public HExpression<0> {
public:
explicit HReturnVoid(uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) {
+ : HExpression(kReturnVoid, SideEffects::None(), dex_pc) {
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
DECLARE_INSTRUCTION(ReturnVoid);
@@ -2684,14 +2747,14 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> {
// Represents dex's RETURN opcodes. A HReturn is a control flow
// instruction that branches to the exit block.
-class HReturn FINAL : public HTemplateInstruction<1> {
+class HReturn final : public HExpression<1> {
public:
explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) {
+ : HExpression(kReturn, SideEffects::None(), dex_pc) {
SetRawInputAt(0, value);
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
DECLARE_INSTRUCTION(Return);
@@ -2699,7 +2762,7 @@ class HReturn FINAL : public HTemplateInstruction<1> {
DEFAULT_COPY_CONSTRUCTOR(Return);
};
-class HPhi FINAL : public HVariableInputSizeInstruction {
+class HPhi final : public HVariableInputSizeInstruction {
public:
HPhi(ArenaAllocator* allocator,
uint32_t reg_number,
@@ -2708,13 +2771,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
uint32_t dex_pc = kNoDexPc)
: HVariableInputSizeInstruction(
kPhi,
+ ToPhiType(type),
SideEffects::None(),
dex_pc,
allocator,
number_of_inputs,
kArenaAllocPhiInputs),
reg_number_(reg_number) {
- SetPackedField<TypeField>(ToPhiType(type));
DCHECK_NE(GetType(), DataType::Type::kVoid);
// Phis are constructed live and marked dead if conflicting or unused.
// Individual steps of SsaBuilder should assume that if a phi has been
@@ -2723,7 +2786,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
SetPackedFlag<kFlagCanBeNull>(true);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
// Returns a type equivalent to the given `type`, but that a `HPhi` can hold.
static DataType::Type ToPhiType(DataType::Type type) {
@@ -2732,7 +2795,6 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); }
- DataType::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); }
void SetType(DataType::Type new_type) {
// Make sure that only valid type changes occur. The following are allowed:
// (1) int -> float/ref (primitive type propagation),
@@ -2744,7 +2806,7 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
SetPackedField<TypeField>(new_type);
}
- bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+ bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); }
void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
uint32_t GetRegNumber() const { return reg_number_; }
@@ -2791,14 +2853,10 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
DEFAULT_COPY_CONSTRUCTOR(Phi);
private:
- static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeSize =
- MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
- static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize;
+ static constexpr size_t kFlagIsLive = HInstruction::kNumberOfGenericPackedBits;
static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1;
static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1;
static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>;
const uint32_t reg_number_;
};
@@ -2806,13 +2864,13 @@ class HPhi FINAL : public HVariableInputSizeInstruction {
// The exit instruction is the only instruction of the exit block.
// Instructions aborting the method (HThrow and HReturn) must branch to the
// exit block.
-class HExit FINAL : public HTemplateInstruction<0> {
+class HExit final : public HExpression<0> {
public:
explicit HExit(uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) {
+ : HExpression(kExit, SideEffects::None(), dex_pc) {
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
DECLARE_INSTRUCTION(Exit);
@@ -2821,14 +2879,14 @@ class HExit FINAL : public HTemplateInstruction<0> {
};
// Jumps from one block to another.
-class HGoto FINAL : public HTemplateInstruction<0> {
+class HGoto final : public HExpression<0> {
public:
explicit HGoto(uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) {
+ : HExpression(kGoto, SideEffects::None(), dex_pc) {
}
- bool IsClonable() const OVERRIDE { return true; }
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool IsControlFlow() const override { return true; }
HBasicBlock* GetSuccessor() const {
return GetBlock()->GetSingleSuccessor();
@@ -2846,7 +2904,7 @@ class HConstant : public HExpression<0> {
: HExpression(kind, type, SideEffects::None(), dex_pc) {
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
// Is this constant -1 in the arithmetic sense?
virtual bool IsMinusOne() const { return false; }
@@ -2865,18 +2923,18 @@ class HConstant : public HExpression<0> {
DEFAULT_COPY_CONSTRUCTOR(Constant);
};
-class HNullConstant FINAL : public HConstant {
+class HNullConstant final : public HConstant {
public:
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- uint64_t GetValueAsUint64() const OVERRIDE { return 0; }
+ uint64_t GetValueAsUint64() const override { return 0; }
- size_t ComputeHashCode() const OVERRIDE { return 0; }
+ size_t ComputeHashCode() const override { return 0; }
// The null constant representation is a 0-bit pattern.
- virtual bool IsZeroBitPattern() const { return true; }
+ bool IsZeroBitPattern() const override { return true; }
DECLARE_INSTRUCTION(NullConstant);
@@ -2893,25 +2951,25 @@ class HNullConstant FINAL : public HConstant {
// Constants of the type int. Those can be from Dex instructions, or
// synthesized (for example with the if-eqz instruction).
-class HIntConstant FINAL : public HConstant {
+class HIntConstant final : public HConstant {
public:
int32_t GetValue() const { return value_; }
- uint64_t GetValueAsUint64() const OVERRIDE {
+ uint64_t GetValueAsUint64() const override {
return static_cast<uint64_t>(static_cast<uint32_t>(value_));
}
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsIntConstant()) << other->DebugName();
return other->AsIntConstant()->value_ == value_;
}
- size_t ComputeHashCode() const OVERRIDE { return GetValue(); }
+ size_t ComputeHashCode() const override { return GetValue(); }
- bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
- bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; }
- bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; }
- bool IsOne() const OVERRIDE { return GetValue() == 1; }
+ bool IsMinusOne() const override { return GetValue() == -1; }
+ bool IsArithmeticZero() const override { return GetValue() == 0; }
+ bool IsZeroBitPattern() const override { return GetValue() == 0; }
+ bool IsOne() const override { return GetValue() == 1; }
// Integer constants are used to encode Boolean values as well,
// where 1 means true and 0 means false.
@@ -2939,23 +2997,23 @@ class HIntConstant FINAL : public HConstant {
ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast);
};
-class HLongConstant FINAL : public HConstant {
+class HLongConstant final : public HConstant {
public:
int64_t GetValue() const { return value_; }
- uint64_t GetValueAsUint64() const OVERRIDE { return value_; }
+ uint64_t GetValueAsUint64() const override { return value_; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsLongConstant()) << other->DebugName();
return other->AsLongConstant()->value_ == value_;
}
- size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); }
- bool IsMinusOne() const OVERRIDE { return GetValue() == -1; }
- bool IsArithmeticZero() const OVERRIDE { return GetValue() == 0; }
- bool IsZeroBitPattern() const OVERRIDE { return GetValue() == 0; }
- bool IsOne() const OVERRIDE { return GetValue() == 1; }
+ bool IsMinusOne() const override { return GetValue() == -1; }
+ bool IsArithmeticZero() const override { return GetValue() == 0; }
+ bool IsZeroBitPattern() const override { return GetValue() == 0; }
+ bool IsOne() const override { return GetValue() == 1; }
DECLARE_INSTRUCTION(LongConstant);
@@ -2973,25 +3031,25 @@ class HLongConstant FINAL : public HConstant {
friend class HGraph;
};
-class HFloatConstant FINAL : public HConstant {
+class HFloatConstant final : public HConstant {
public:
float GetValue() const { return value_; }
- uint64_t GetValueAsUint64() const OVERRIDE {
+ uint64_t GetValueAsUint64() const override {
return static_cast<uint64_t>(bit_cast<uint32_t, float>(value_));
}
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsFloatConstant()) << other->DebugName();
return other->AsFloatConstant()->GetValueAsUint64() == GetValueAsUint64();
}
- size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); }
- bool IsMinusOne() const OVERRIDE {
+ bool IsMinusOne() const override {
return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>((-1.0f));
}
- bool IsArithmeticZero() const OVERRIDE {
+ bool IsArithmeticZero() const override {
return std::fpclassify(value_) == FP_ZERO;
}
bool IsArithmeticPositiveZero() const {
@@ -3000,10 +3058,10 @@ class HFloatConstant FINAL : public HConstant {
bool IsArithmeticNegativeZero() const {
return IsArithmeticZero() && std::signbit(value_);
}
- bool IsZeroBitPattern() const OVERRIDE {
+ bool IsZeroBitPattern() const override {
return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(0.0f);
}
- bool IsOne() const OVERRIDE {
+ bool IsOne() const override {
return bit_cast<uint32_t, float>(value_) == bit_cast<uint32_t, float>(1.0f);
}
bool IsNaN() const {
@@ -3032,23 +3090,23 @@ class HFloatConstant FINAL : public HConstant {
friend class HGraph;
};
-class HDoubleConstant FINAL : public HConstant {
+class HDoubleConstant final : public HConstant {
public:
double GetValue() const { return value_; }
- uint64_t GetValueAsUint64() const OVERRIDE { return bit_cast<uint64_t, double>(value_); }
+ uint64_t GetValueAsUint64() const override { return bit_cast<uint64_t, double>(value_); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsDoubleConstant()) << other->DebugName();
return other->AsDoubleConstant()->GetValueAsUint64() == GetValueAsUint64();
}
- size_t ComputeHashCode() const OVERRIDE { return static_cast<size_t>(GetValue()); }
+ size_t ComputeHashCode() const override { return static_cast<size_t>(GetValue()); }
- bool IsMinusOne() const OVERRIDE {
+ bool IsMinusOne() const override {
return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((-1.0));
}
- bool IsArithmeticZero() const OVERRIDE {
+ bool IsArithmeticZero() const override {
return std::fpclassify(value_) == FP_ZERO;
}
bool IsArithmeticPositiveZero() const {
@@ -3057,10 +3115,10 @@ class HDoubleConstant FINAL : public HConstant {
bool IsArithmeticNegativeZero() const {
return IsArithmeticZero() && std::signbit(value_);
}
- bool IsZeroBitPattern() const OVERRIDE {
+ bool IsZeroBitPattern() const override {
return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>((0.0));
}
- bool IsOne() const OVERRIDE {
+ bool IsOne() const override {
return bit_cast<uint64_t, double>(value_) == bit_cast<uint64_t, double>(1.0);
}
bool IsNaN() const {
@@ -3091,15 +3149,15 @@ class HDoubleConstant FINAL : public HConstant {
// Conditional branch. A block ending with an HIf instruction must have
// two successors.
-class HIf FINAL : public HTemplateInstruction<1> {
+class HIf final : public HExpression<1> {
public:
explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) {
+ : HExpression(kIf, SideEffects::None(), dex_pc) {
SetRawInputAt(0, input);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool IsControlFlow() const override { return true; }
HBasicBlock* IfTrueSuccessor() const {
return GetBlock()->GetSuccessors()[0];
@@ -3121,7 +3179,7 @@ class HIf FINAL : public HTemplateInstruction<1> {
// non-exceptional control flow.
// Normal-flow successor is stored at index zero, exception handlers under
// higher indices in no particular order.
-class HTryBoundary FINAL : public HTemplateInstruction<0> {
+class HTryBoundary final : public HExpression<0> {
public:
enum class BoundaryKind {
kEntry,
@@ -3129,12 +3187,19 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
kLast = kExit
};
+ // SideEffects::CanTriggerGC prevents instructions with SideEffects::DependOnGC to be alive
+ // across the catch block entering edges as GC might happen during throwing an exception.
+ // TryBoundary with BoundaryKind::kExit is conservatively used for that as there is no
+ // HInstruction which a catch block must start from.
explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) {
+ : HExpression(kTryBoundary,
+ (kind == BoundaryKind::kExit) ? SideEffects::CanTriggerGC()
+ : SideEffects::None(),
+ dex_pc) {
SetPackedField<BoundaryKindField>(kind);
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
// Returns the block's non-exceptional successor (index zero).
HBasicBlock* GetNormalFlowSuccessor() const { return GetBlock()->GetSuccessors()[0]; }
@@ -3180,7 +3245,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> {
};
// Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HVariableInputSizeInstruction {
+class HDeoptimize final : public HVariableInputSizeInstruction {
public:
// Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
// across.
@@ -3193,14 +3258,14 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
SideEffects::All(),
dex_pc,
allocator,
- /* number_of_inputs */ 1,
+ /* number_of_inputs= */ 1,
kArenaAllocMisc) {
SetPackedFlag<kFieldCanBeMoved>(false);
SetPackedField<DeoptimizeKindField>(kind);
SetRawInputAt(0, cond);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
// Use this constructor when the `HDeoptimize` guards an instruction, and any user
// that relies on the deoptimization to pass should have its input be the `HDeoptimize`
@@ -3214,10 +3279,11 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
uint32_t dex_pc)
: HVariableInputSizeInstruction(
kDeoptimize,
+ guard->GetType(),
SideEffects::CanTriggerGC(),
dex_pc,
allocator,
- /* number_of_inputs */ 2,
+ /* number_of_inputs= */ 2,
kArenaAllocMisc) {
SetPackedFlag<kFieldCanBeMoved>(true);
SetPackedField<DeoptimizeKindField>(kind);
@@ -3225,22 +3291,18 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
SetRawInputAt(1, guard);
}
- bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+ bool CanBeMoved() const override { return GetPackedFlag<kFieldCanBeMoved>(); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
}
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); }
- DataType::Type GetType() const OVERRIDE {
- return GuardsAnInput() ? GuardedInput()->GetType() : DataType::Type::kVoid;
- }
-
bool GuardsAnInput() const {
return InputCount() == 2;
}
@@ -3277,12 +3339,13 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction {
// if it's true, starts to do deoptimization.
// It has a 4-byte slot on stack.
// TODO: allocate a register for this flag.
-class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction {
+class HShouldDeoptimizeFlag final : public HVariableInputSizeInstruction {
public:
// CHA guards are only optimized in a separate pass and it has no side effects
// with regard to other passes.
HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc)
: HVariableInputSizeInstruction(kShouldDeoptimizeFlag,
+ DataType::Type::kInt32,
SideEffects::None(),
dex_pc,
allocator,
@@ -3290,13 +3353,11 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction {
kArenaAllocCHA) {
}
- DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; }
-
// We do all CHA guard elimination/motion in a single pass, after which there is no
// further guard elimination/motion since a guard might have been used for justification
// of the elimination of another guard. Therefore, we pretend this guard cannot be moved
// to avoid other optimizations trying to move it.
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
DECLARE_INSTRUCTION(ShouldDeoptimizeFlag);
@@ -3307,7 +3368,7 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction {
// Represents the ArtMethod that was passed as a first argument to
// the method. It is used by instructions that depend on it, like
// instructions that work with the dex cache.
-class HCurrentMethod FINAL : public HExpression<0> {
+class HCurrentMethod final : public HExpression<0> {
public:
explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc)
: HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) {
@@ -3321,7 +3382,7 @@ class HCurrentMethod FINAL : public HExpression<0> {
// Fetches an ArtMethod from the virtual table or the interface method table
// of a class.
-class HClassTableGet FINAL : public HExpression<1> {
+class HClassTableGet final : public HExpression<1> {
public:
enum class TableKind {
kVTable,
@@ -3339,9 +3400,9 @@ class HClassTableGet FINAL : public HExpression<1> {
SetRawInputAt(0, cls);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other) const override {
return other->AsClassTableGet()->GetIndex() == index_ &&
other->AsClassTableGet()->GetPackedFields() == GetPackedFields();
}
@@ -3355,7 +3416,7 @@ class HClassTableGet FINAL : public HExpression<1> {
DEFAULT_COPY_CONSTRUCTOR(ClassTableGet);
private:
- static constexpr size_t kFieldTableKind = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldTableKind = kNumberOfGenericPackedBits;
static constexpr size_t kFieldTableKindSize =
MinimumBitsToStore(static_cast<size_t>(TableKind::kLast));
static constexpr size_t kNumberOfClassTableGetPackedBits = kFieldTableKind + kFieldTableKindSize;
@@ -3370,21 +3431,21 @@ class HClassTableGet FINAL : public HExpression<1> {
// PackedSwitch (jump table). A block ending with a PackedSwitch instruction will
// have one successor for each entry in the switch table, and the final successor
// will be the block containing the next Dex opcode.
-class HPackedSwitch FINAL : public HTemplateInstruction<1> {
+class HPackedSwitch final : public HExpression<1> {
public:
HPackedSwitch(int32_t start_value,
uint32_t num_entries,
HInstruction* input,
uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc),
+ : HExpression(kPackedSwitch, SideEffects::None(), dex_pc),
start_value_(start_value),
num_entries_(num_entries) {
SetRawInputAt(0, input);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
int32_t GetStartValue() const { return start_value_; }
@@ -3415,13 +3476,13 @@ class HUnaryOperation : public HExpression<1> {
}
// All of the UnaryOperation instructions are clonable.
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
HInstruction* GetInput() const { return InputAt(0); }
DataType::Type GetResultType() const { return GetType(); }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
@@ -3456,7 +3517,7 @@ class HBinaryOperation : public HExpression<2> {
}
// All of the BinaryOperation instructions are clonable.
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
HInstruction* GetLeft() const { return InputAt(0); }
HInstruction* GetRight() const { return InputAt(1); }
@@ -3496,8 +3557,8 @@ class HBinaryOperation : public HExpression<2> {
}
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
@@ -3578,7 +3639,7 @@ class HCondition : public HBinaryOperation {
ComparisonBias GetBias() const { return GetPackedField<ComparisonBiasField>(); }
void SetBias(ComparisonBias bias) { SetPackedField<ComparisonBiasField>(bias); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
return GetPackedFields() == other->AsCondition()->GetPackedFields();
}
@@ -3606,7 +3667,7 @@ class HCondition : public HBinaryOperation {
protected:
// Needed if we merge a HCompare into a HCondition.
- static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits;
static constexpr size_t kFieldComparisonBiasSize =
MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast));
static constexpr size_t kNumberOfConditionPackedBits =
@@ -3635,42 +3696,42 @@ class HCondition : public HBinaryOperation {
};
// Instruction to check if two inputs are equal to each other.
-class HEqual FINAL : public HCondition {
+class HEqual final : public HCondition {
public:
HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kEqual, first, second, dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HNullConstant* y ATTRIBUTE_UNUSED) const override {
return MakeConstantCondition(true, GetDexPc());
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HEqual instruction; evaluate it as
// `Compare(x, y) == 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0),
GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(Equal);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondEQ;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondNE;
}
@@ -3681,42 +3742,42 @@ class HEqual FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x == y; }
};
-class HNotEqual FINAL : public HCondition {
+class HNotEqual final : public HCondition {
public:
HNotEqual(HInstruction* first, HInstruction* second,
uint32_t dex_pc = kNoDexPc)
: HCondition(kNotEqual, first, second, dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
HConstant* Evaluate(HNullConstant* x ATTRIBUTE_UNUSED,
- HNullConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HNullConstant* y ATTRIBUTE_UNUSED) const override {
return MakeConstantCondition(false, GetDexPc());
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HNotEqual instruction; evaluate it as
// `Compare(x, y) != 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(NotEqual);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondNE;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondEQ;
}
@@ -3727,36 +3788,36 @@ class HNotEqual FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x != y; }
};
-class HLessThan FINAL : public HCondition {
+class HLessThan final : public HCondition {
public:
HLessThan(HInstruction* first, HInstruction* second,
uint32_t dex_pc = kNoDexPc)
: HCondition(kLessThan, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HLessThan instruction; evaluate it as
// `Compare(x, y) < 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(LessThan);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondLT;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondGE;
}
@@ -3767,36 +3828,36 @@ class HLessThan FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x < y; }
};
-class HLessThanOrEqual FINAL : public HCondition {
+class HLessThanOrEqual final : public HCondition {
public:
HLessThanOrEqual(HInstruction* first, HInstruction* second,
uint32_t dex_pc = kNoDexPc)
: HCondition(kLessThanOrEqual, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HLessThanOrEqual instruction; evaluate it as
// `Compare(x, y) <= 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(LessThanOrEqual);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondLE;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondGT;
}
@@ -3807,35 +3868,35 @@ class HLessThanOrEqual FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x <= y; }
};
-class HGreaterThan FINAL : public HCondition {
+class HGreaterThan final : public HCondition {
public:
HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kGreaterThan, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HGreaterThan instruction; evaluate it as
// `Compare(x, y) > 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(GreaterThan);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondGT;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondLE;
}
@@ -3846,35 +3907,35 @@ class HGreaterThan FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x > y; }
};
-class HGreaterThanOrEqual FINAL : public HCondition {
+class HGreaterThanOrEqual final : public HCondition {
public:
HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kGreaterThanOrEqual, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
// In the following Evaluate methods, a HCompare instruction has
// been merged into this HGreaterThanOrEqual instruction; evaluate it as
// `Compare(x, y) >= 0`.
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(Compare(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantCondition(Compute(CompareFP(x->GetValue(), y->GetValue()), 0), GetDexPc());
}
DECLARE_INSTRUCTION(GreaterThanOrEqual);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondGE;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondLT;
}
@@ -3885,36 +3946,36 @@ class HGreaterThanOrEqual FINAL : public HCondition {
template <typename T> static bool Compute(T x, T y) { return x >= y; }
};
-class HBelow FINAL : public HCondition {
+class HBelow final : public HCondition {
public:
HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kBelow, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
DECLARE_INSTRUCTION(Below);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondB;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondAE;
}
@@ -3927,36 +3988,36 @@ class HBelow FINAL : public HCondition {
}
};
-class HBelowOrEqual FINAL : public HCondition {
+class HBelowOrEqual final : public HCondition {
public:
HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kBelowOrEqual, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
DECLARE_INSTRUCTION(BelowOrEqual);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondBE;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondA;
}
@@ -3969,36 +4030,36 @@ class HBelowOrEqual FINAL : public HCondition {
}
};
-class HAbove FINAL : public HCondition {
+class HAbove final : public HCondition {
public:
HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kAbove, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
DECLARE_INSTRUCTION(Above);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondA;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondBE;
}
@@ -4011,36 +4072,36 @@ class HAbove FINAL : public HCondition {
}
};
-class HAboveOrEqual FINAL : public HCondition {
+class HAboveOrEqual final : public HCondition {
public:
HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc)
: HCondition(kAboveOrEqual, first, second, dex_pc) {
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
DECLARE_INSTRUCTION(AboveOrEqual);
- IfCondition GetCondition() const OVERRIDE {
+ IfCondition GetCondition() const override {
return kCondAE;
}
- IfCondition GetOppositeCondition() const OVERRIDE {
+ IfCondition GetOppositeCondition() const override {
return kCondB;
}
@@ -4055,7 +4116,7 @@ class HAboveOrEqual FINAL : public HCondition {
// Instruction to check how two inputs compare to each other.
// Result is 0 if input0 == input1, 1 if input0 > input1, or -1 if input0 < input1.
-class HCompare FINAL : public HBinaryOperation {
+class HCompare final : public HBinaryOperation {
public:
// Note that `comparison_type` is the type of comparison performed
// between the comparison's inputs, not the type of the instantiated
@@ -4087,7 +4148,7 @@ class HCompare FINAL : public HBinaryOperation {
return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y);
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
// Note that there is no "cmp-int" Dex instruction so we shouldn't
// reach this code path when processing a freshly built HIR
// graph. However HCompare integer instructions can be synthesized
@@ -4095,17 +4156,17 @@ class HCompare FINAL : public HBinaryOperation {
// IntegerSignum intrinsics, so we have to handle this case.
return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return MakeConstantComparison(Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return MakeConstantComparison(ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
return GetPackedFields() == other->AsCompare()->GetPackedFields();
}
@@ -4126,7 +4187,7 @@ class HCompare FINAL : public HBinaryOperation {
DECLARE_INSTRUCTION(Compare);
protected:
- static constexpr size_t kFieldComparisonBias = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldComparisonBias = kNumberOfGenericPackedBits;
static constexpr size_t kFieldComparisonBiasSize =
MinimumBitsToStore(static_cast<size_t>(ComparisonBias::kLast));
static constexpr size_t kNumberOfComparePackedBits =
@@ -4144,7 +4205,7 @@ class HCompare FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Compare);
};
-class HNewInstance FINAL : public HExpression<1> {
+class HNewInstance final : public HExpression<1> {
public:
HNewInstance(HInstruction* cls,
uint32_t dex_pc,
@@ -4163,16 +4224,16 @@ class HNewInstance FINAL : public HExpression<1> {
SetRawInputAt(0, cls);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
dex::TypeIndex GetTypeIndex() const { return type_index_; }
const DexFile& GetDexFile() const { return dex_file_; }
// Calls runtime so needs an environment.
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
// Can throw errors when out-of-memory or if it's not instantiable/accessible.
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
bool NeedsChecks() const {
return entrypoint_ == kQuickAllocObjectWithChecks;
@@ -4180,7 +4241,7 @@ class HNewInstance FINAL : public HExpression<1> {
bool IsFinalizable() const { return GetPackedFlag<kFlagFinalizable>(); }
- bool CanBeNull() const OVERRIDE { return false; }
+ bool CanBeNull() const override { return false; }
QuickEntrypointEnum GetEntrypoint() const { return entrypoint_; }
@@ -4205,7 +4266,7 @@ class HNewInstance FINAL : public HExpression<1> {
DEFAULT_COPY_CONSTRUCTOR(NewInstance);
private:
- static constexpr size_t kFlagFinalizable = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagFinalizable = kNumberOfGenericPackedBits;
static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1;
static_assert(kNumberOfNewInstancePackedBits <= kMaxNumberOfPackedBits,
"Too many packed fields.");
@@ -4234,7 +4295,7 @@ enum IntrinsicExceptions {
class HInvoke : public HVariableInputSizeInstruction {
public:
- bool NeedsEnvironment() const OVERRIDE;
+ bool NeedsEnvironment() const override;
void SetArgumentAt(size_t index, HInstruction* argument) {
SetRawInputAt(index, argument);
@@ -4246,8 +4307,6 @@ class HInvoke : public HVariableInputSizeInstruction {
// inputs at the end of their list of inputs.
uint32_t GetNumberOfArguments() const { return number_of_arguments_; }
- DataType::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); }
-
uint32_t GetDexMethodIndex() const { return dex_method_index_; }
InvokeType GetInvokeType() const {
@@ -4269,15 +4328,15 @@ class HInvoke : public HVariableInputSizeInstruction {
void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
- bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
+ bool CanThrow() const override { return GetPackedFlag<kFlagCanThrow>(); }
void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); }
- bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); }
+ bool AlwaysThrows() const override { return GetPackedFlag<kFlagAlwaysThrows>(); }
- bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); }
+ bool CanBeMoved() const override { return IsIntrinsic() && !DoesAnyWrite(); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
return intrinsic_ != Intrinsics::kNone && intrinsic_ == other->AsInvoke()->intrinsic_;
}
@@ -4292,7 +4351,7 @@ class HInvoke : public HVariableInputSizeInstruction {
bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
ArtMethod* GetResolvedMethod() const { return resolved_method_; }
- void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; }
+ void SetResolvedMethod(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
DECLARE_ABSTRACT_INSTRUCTION(Invoke);
@@ -4300,16 +4359,11 @@ class HInvoke : public HVariableInputSizeInstruction {
static constexpr size_t kFieldInvokeType = kNumberOfGenericPackedBits;
static constexpr size_t kFieldInvokeTypeSize =
MinimumBitsToStore(static_cast<size_t>(kMaxInvokeType));
- static constexpr size_t kFieldReturnType =
- kFieldInvokeType + kFieldInvokeTypeSize;
- static constexpr size_t kFieldReturnTypeSize =
- MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
- static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize;
+ static constexpr size_t kFlagCanThrow = kFieldInvokeType + kFieldInvokeTypeSize;
static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1;
static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1;
static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>;
- using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>;
HInvoke(InstructionKind kind,
ArenaAllocator* allocator,
@@ -4322,19 +4376,21 @@ class HInvoke : public HVariableInputSizeInstruction {
InvokeType invoke_type)
: HVariableInputSizeInstruction(
kind,
+ return_type,
SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays.
dex_pc,
allocator,
number_of_arguments + number_of_other_inputs,
kArenaAllocInvokeInputs),
number_of_arguments_(number_of_arguments),
- resolved_method_(resolved_method),
dex_method_index_(dex_method_index),
intrinsic_(Intrinsics::kNone),
intrinsic_optimizations_(0) {
- SetPackedField<ReturnTypeField>(return_type);
SetPackedField<InvokeTypeField>(invoke_type);
SetPackedFlag<kFlagCanThrow>(true);
+ // Check mutator lock, constructors lack annotalysis support.
+ Locks::mutator_lock_->AssertNotExclusiveHeld(Thread::Current());
+ SetResolvedMethod(resolved_method);
}
DEFAULT_COPY_CONSTRUCTOR(Invoke);
@@ -4348,7 +4404,7 @@ class HInvoke : public HVariableInputSizeInstruction {
uint32_t intrinsic_optimizations_;
};
-class HInvokeUnresolved FINAL : public HInvoke {
+class HInvokeUnresolved final : public HInvoke {
public:
HInvokeUnresolved(ArenaAllocator* allocator,
uint32_t number_of_arguments,
@@ -4359,7 +4415,7 @@ class HInvokeUnresolved FINAL : public HInvoke {
: HInvoke(kInvokeUnresolved,
allocator,
number_of_arguments,
- 0u /* number_of_other_inputs */,
+ /* number_of_other_inputs= */ 0u,
return_type,
dex_pc,
dex_method_index,
@@ -4367,7 +4423,7 @@ class HInvokeUnresolved FINAL : public HInvoke {
invoke_type) {
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
DECLARE_INSTRUCTION(InvokeUnresolved);
@@ -4375,7 +4431,7 @@ class HInvokeUnresolved FINAL : public HInvoke {
DEFAULT_COPY_CONSTRUCTOR(InvokeUnresolved);
};
-class HInvokePolymorphic FINAL : public HInvoke {
+class HInvokePolymorphic final : public HInvoke {
public:
HInvokePolymorphic(ArenaAllocator* allocator,
uint32_t number_of_arguments,
@@ -4385,7 +4441,7 @@ class HInvokePolymorphic FINAL : public HInvoke {
: HInvoke(kInvokePolymorphic,
allocator,
number_of_arguments,
- 0u /* number_of_other_inputs */,
+ /* number_of_other_inputs= */ 0u,
return_type,
dex_pc,
dex_method_index,
@@ -4393,7 +4449,7 @@ class HInvokePolymorphic FINAL : public HInvoke {
kVirtual) {
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
DECLARE_INSTRUCTION(InvokePolymorphic);
@@ -4401,7 +4457,39 @@ class HInvokePolymorphic FINAL : public HInvoke {
DEFAULT_COPY_CONSTRUCTOR(InvokePolymorphic);
};
-class HInvokeStaticOrDirect FINAL : public HInvoke {
+class HInvokeCustom final : public HInvoke {
+ public:
+ HInvokeCustom(ArenaAllocator* allocator,
+ uint32_t number_of_arguments,
+ uint32_t call_site_index,
+ DataType::Type return_type,
+ uint32_t dex_pc)
+ : HInvoke(kInvokeCustom,
+ allocator,
+ number_of_arguments,
+ /* number_of_other_inputs= */ 0u,
+ return_type,
+ dex_pc,
+ /* dex_method_index= */ dex::kDexNoIndex,
+ /* resolved_method= */ nullptr,
+ kStatic),
+ call_site_index_(call_site_index) {
+ }
+
+ uint32_t GetCallSiteIndex() const { return call_site_index_; }
+
+ bool IsClonable() const override { return true; }
+
+ DECLARE_INSTRUCTION(InvokeCustom);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(InvokeCustom);
+
+ private:
+ uint32_t call_site_index_;
+};
+
+class HInvokeStaticOrDirect final : public HInvoke {
public:
// Requirements of this method call regarding the class
// initialization (clinit) check of its declaring class.
@@ -4424,14 +4512,18 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
// Used for boot image methods referenced by boot image code.
kBootImageLinkTimePcRelative,
- // Use ArtMethod* at a known address, embed the direct address in the code.
- // Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
- kDirectAddress,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for app->boot calls with relocatable image.
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
- // Used for classes outside boot image when .bss is accessible with a PC-relative load.
+ // Used for methods outside boot image referenced by AOT-compiled app and boot image code.
kBssEntry,
+ // Use ArtMethod* at a known address, embed the direct address in the code.
+ // Used for for JIT-compiled calls.
+ kJitDirectAddress,
+
// Make a runtime call to resolve and call the method. This is the last-resort-kind
// used when other kinds are unimplemented on a particular architecture.
kRuntimeCall,
@@ -4472,8 +4564,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
allocator,
number_of_arguments,
// There is potentially one extra argument for the HCurrentMethod node, and
- // potentially one other if the clinit check is explicit, and potentially
- // one other if the method is a string factory.
+ // potentially one other if the clinit check is explicit.
(NeedsCurrentMethodInput(dispatch_info.method_load_kind) ? 1u : 0u) +
(clinit_check_requirement == ClinitCheckRequirement::kExplicit ? 1u : 0u),
return_type,
@@ -4486,7 +4577,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
void SetDispatchInfo(const DispatchInfo& dispatch_info) {
bool had_current_method_input = HasCurrentMethodInput();
@@ -4516,7 +4607,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
}
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() override {
ArrayRef<HUserRecord<HInstruction*>> input_records = HInvoke::GetInputRecords();
if (kIsDebugBuild && IsStaticWithExplicitClinitCheck()) {
DCHECK(!input_records.empty());
@@ -4534,14 +4625,14 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
return input_records;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
// We access the method via the dex cache so we can't do an implicit null check.
// TODO: for intrinsics we can generate implicit null checks.
return false;
}
- bool CanBeNull() const OVERRIDE {
- return GetPackedField<ReturnTypeField>() == DataType::Type::kReference && !IsStringInit();
+ bool CanBeNull() const override {
+ return GetType() == DataType::Type::kReference && !IsStringInit();
}
// Get the index of the special input, if any.
@@ -4555,11 +4646,12 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
MethodLoadKind GetMethodLoadKind() const { return dispatch_info_.method_load_kind; }
CodePtrLocation GetCodePtrLocation() const { return dispatch_info_.code_ptr_location; }
bool IsRecursive() const { return GetMethodLoadKind() == MethodLoadKind::kRecursive; }
- bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
+ bool NeedsDexCacheOfDeclaringClass() const override;
bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
- bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
+ bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kJitDirectAddress; }
bool HasPcRelativeMethodLoadKind() const {
return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+ GetMethodLoadKind() == MethodLoadKind::kBootImageRelRo ||
GetMethodLoadKind() == MethodLoadKind::kBssEntry;
}
bool HasCurrentMethodInput() const {
@@ -4655,7 +4747,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke {
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs);
std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs);
-class HInvokeVirtual FINAL : public HInvoke {
+class HInvokeVirtual final : public HInvoke {
public:
HInvokeVirtual(ArenaAllocator* allocator,
uint32_t number_of_arguments,
@@ -4676,9 +4768,9 @@ class HInvokeVirtual FINAL : public HInvoke {
vtable_index_(vtable_index) {
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool CanBeNull() const OVERRIDE {
+ bool CanBeNull() const override {
switch (GetIntrinsic()) {
case Intrinsics::kThreadCurrentThread:
case Intrinsics::kStringBufferAppend:
@@ -4691,9 +4783,9 @@ class HInvokeVirtual FINAL : public HInvoke {
}
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
// TODO: Add implicit null checks in intrinsics.
- return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
+ return (obj == InputAt(0)) && !IsIntrinsic();
}
uint32_t GetVTableIndex() const { return vtable_index_; }
@@ -4708,7 +4800,7 @@ class HInvokeVirtual FINAL : public HInvoke {
const uint32_t vtable_index_;
};
-class HInvokeInterface FINAL : public HInvoke {
+class HInvokeInterface final : public HInvoke {
public:
HInvokeInterface(ArenaAllocator* allocator,
uint32_t number_of_arguments,
@@ -4729,14 +4821,14 @@ class HInvokeInterface FINAL : public HInvoke {
imt_index_(imt_index) {
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
// TODO: Add implicit null checks in intrinsics.
- return (obj == InputAt(0)) && !GetLocations()->Intrinsified();
+ return (obj == InputAt(0)) && !IsIntrinsic();
}
- bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+ bool NeedsDexCacheOfDeclaringClass() const override {
// The assembly stub currently needs it.
return true;
}
@@ -4753,7 +4845,7 @@ class HInvokeInterface FINAL : public HInvoke {
const uint32_t imt_index_;
};
-class HNeg FINAL : public HUnaryOperation {
+class HNeg final : public HUnaryOperation {
public:
HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
: HUnaryOperation(kNeg, result_type, input, dex_pc) {
@@ -4762,16 +4854,16 @@ class HNeg FINAL : public HUnaryOperation {
template <typename T> static T Compute(T x) { return -x; }
- HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x) const override {
return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x) const override {
return GetBlock()->GetGraph()->GetFloatConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(Compute(x->GetValue()), GetDexPc());
}
@@ -4781,23 +4873,24 @@ class HNeg FINAL : public HUnaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Neg);
};
-class HNewArray FINAL : public HExpression<2> {
+class HNewArray final : public HExpression<2> {
public:
- HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc)
+ HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc, size_t component_size_shift)
: HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, cls);
SetRawInputAt(1, length);
+ SetPackedField<ComponentSizeShiftField>(component_size_shift);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
// Calls runtime so needs an environment.
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
// May throw NegativeArraySizeException, OutOfMemoryError, etc.
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
- bool CanBeNull() const OVERRIDE { return false; }
+ bool CanBeNull() const override { return false; }
HLoadClass* GetLoadClass() const {
DCHECK(InputAt(0)->IsLoadClass());
@@ -4808,13 +4901,26 @@ class HNewArray FINAL : public HExpression<2> {
return InputAt(1);
}
+ size_t GetComponentSizeShift() {
+ return GetPackedField<ComponentSizeShiftField>();
+ }
+
DECLARE_INSTRUCTION(NewArray);
protected:
DEFAULT_COPY_CONSTRUCTOR(NewArray);
+
+ private:
+ static constexpr size_t kFieldComponentSizeShift = kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldComponentSizeShiftSize = MinimumBitsToStore(3u);
+ static constexpr size_t kNumberOfNewArrayPackedBits =
+ kFieldComponentSizeShift + kFieldComponentSizeShiftSize;
+ static_assert(kNumberOfNewArrayPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+ using ComponentSizeShiftField =
+ BitField<size_t, kFieldComponentSizeShift, kFieldComponentSizeShift>;
};
-class HAdd FINAL : public HBinaryOperation {
+class HAdd final : public HBinaryOperation {
public:
HAdd(DataType::Type result_type,
HInstruction* left,
@@ -4823,23 +4929,23 @@ class HAdd FINAL : public HBinaryOperation {
: HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
template <typename T> static T Compute(T x, T y) { return x + y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return GetBlock()->GetGraph()->GetFloatConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
@@ -4850,7 +4956,7 @@ class HAdd FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Add);
};
-class HSub FINAL : public HBinaryOperation {
+class HSub final : public HBinaryOperation {
public:
HSub(DataType::Type result_type,
HInstruction* left,
@@ -4861,19 +4967,19 @@ class HSub FINAL : public HBinaryOperation {
template <typename T> static T Compute(T x, T y) { return x - y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return GetBlock()->GetGraph()->GetFloatConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
@@ -4884,7 +4990,7 @@ class HSub FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Sub);
};
-class HMul FINAL : public HBinaryOperation {
+class HMul final : public HBinaryOperation {
public:
HMul(DataType::Type result_type,
HInstruction* left,
@@ -4893,23 +4999,23 @@ class HMul FINAL : public HBinaryOperation {
: HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
template <typename T> static T Compute(T x, T y) { return x * y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return GetBlock()->GetGraph()->GetFloatConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
@@ -4920,7 +5026,7 @@ class HMul FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Mul);
};
-class HDiv FINAL : public HBinaryOperation {
+class HDiv final : public HBinaryOperation {
public:
HDiv(DataType::Type result_type,
HInstruction* left,
@@ -4945,19 +5051,19 @@ class HDiv FINAL : public HBinaryOperation {
return x / y;
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return GetBlock()->GetGraph()->GetFloatConstant(
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
@@ -4968,7 +5074,7 @@ class HDiv FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Div);
};
-class HRem FINAL : public HBinaryOperation {
+class HRem final : public HBinaryOperation {
public:
HRem(DataType::Type result_type,
HInstruction* left,
@@ -4993,19 +5099,19 @@ class HRem FINAL : public HBinaryOperation {
return std::fmod(x, y);
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x, HFloatConstant* y) const override {
return GetBlock()->GetGraph()->GetFloatConstant(
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x, HDoubleConstant* y) const override {
return GetBlock()->GetGraph()->GetDoubleConstant(
ComputeFP(x->GetValue(), y->GetValue()), GetDexPc());
}
@@ -5016,25 +5122,136 @@ class HRem FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Rem);
};
-class HDivZeroCheck FINAL : public HExpression<1> {
+class HMin final : public HBinaryOperation {
+ public:
+ HMin(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMin, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const override { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x <= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+
+ DECLARE_INSTRUCTION(Min);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Min);
+};
+
+class HMax final : public HBinaryOperation {
+ public:
+ HMax(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc)
+ : HBinaryOperation(kMax, result_type, left, right, SideEffects::None(), dex_pc) {}
+
+ bool IsCommutative() const override { return true; }
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x, T y) {
+ return (x >= y) ? x : y;
+ }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ ComputeIntegral(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ // TODO: Evaluation for floating-point values.
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override { return nullptr; }
+
+ DECLARE_INSTRUCTION(Max);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Max);
+};
+
+class HAbs final : public HUnaryOperation {
+ public:
+ HAbs(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
+ : HUnaryOperation(kAbs, result_type, input, dex_pc) {}
+
+ // Evaluation for integral values.
+ template <typename T> static T ComputeIntegral(T x) {
+ return x < 0 ? -x : x;
+ }
+
+ // Evaluation for floating-point values.
+ // Note, as a "quality of implementation", rather than pure "spec compliance",
+ // we require that Math.abs() clears the sign bit (but changes nothing else)
+ // for all floating-point numbers, including NaN (signaling NaN may become quiet though).
+ // http://b/30758343
+ template <typename T, typename S> static T ComputeFP(T x) {
+ S bits = bit_cast<S, T>(x);
+ return bit_cast<T, S>(bits & std::numeric_limits<S>::max());
+ }
+
+ HConstant* Evaluate(HIntConstant* x) const override {
+ return GetBlock()->GetGraph()->GetIntConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x) const override {
+ return GetBlock()->GetGraph()->GetLongConstant(ComputeIntegral(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HFloatConstant* x) const override {
+ return GetBlock()->GetGraph()->GetFloatConstant(
+ ComputeFP<float, int32_t>(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HDoubleConstant* x) const override {
+ return GetBlock()->GetGraph()->GetDoubleConstant(
+ ComputeFP<double, int64_t>(x->GetValue()), GetDexPc());
+ }
+
+ DECLARE_INSTRUCTION(Abs);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(Abs);
+};
+
+class HDivZeroCheck final : public HExpression<1> {
public:
// `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException`
- // constructor.
+ // constructor. However it can only do it on a fatal slow path so execution never returns to the
+ // instruction following the current one; thus 'SideEffects::None()' is used.
HDivZeroCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
+ : HExpression(kDivZeroCheck, value->GetType(), SideEffects::None(), dex_pc) {
SetRawInputAt(0, value);
}
- DataType::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); }
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
-
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool NeedsEnvironment() const OVERRIDE { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
+ bool CanThrow() const override { return true; }
DECLARE_INSTRUCTION(DivZeroCheck);
@@ -5042,7 +5259,7 @@ class HDivZeroCheck FINAL : public HExpression<1> {
DEFAULT_COPY_CONSTRUCTOR(DivZeroCheck);
};
-class HShl FINAL : public HBinaryOperation {
+class HShl final : public HBinaryOperation {
public:
HShl(DataType::Type result_type,
HInstruction* value,
@@ -5058,26 +5275,26 @@ class HShl FINAL : public HBinaryOperation {
return value << (distance & max_shift_distance);
}
- HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HLongConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5088,7 +5305,7 @@ class HShl FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Shl);
};
-class HShr FINAL : public HBinaryOperation {
+class HShr final : public HBinaryOperation {
public:
HShr(DataType::Type result_type,
HInstruction* value,
@@ -5104,26 +5321,26 @@ class HShr FINAL : public HBinaryOperation {
return value >> (distance & max_shift_distance);
}
- HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HLongConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5134,7 +5351,7 @@ class HShr FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Shr);
};
-class HUShr FINAL : public HBinaryOperation {
+class HUShr final : public HBinaryOperation {
public:
HUShr(DataType::Type result_type,
HInstruction* value,
@@ -5152,26 +5369,26 @@ class HUShr FINAL : public HBinaryOperation {
return static_cast<T>(ux >> (distance & max_shift_distance));
}
- HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HLongConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5182,7 +5399,7 @@ class HUShr FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(UShr);
};
-class HAnd FINAL : public HBinaryOperation {
+class HAnd final : public HBinaryOperation {
public:
HAnd(DataType::Type result_type,
HInstruction* left,
@@ -5191,25 +5408,25 @@ class HAnd FINAL : public HBinaryOperation {
: HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
template <typename T> static T Compute(T x, T y) { return x & y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5220,7 +5437,7 @@ class HAnd FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(And);
};
-class HOr FINAL : public HBinaryOperation {
+class HOr final : public HBinaryOperation {
public:
HOr(DataType::Type result_type,
HInstruction* left,
@@ -5229,25 +5446,25 @@ class HOr FINAL : public HBinaryOperation {
: HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
template <typename T> static T Compute(T x, T y) { return x | y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5258,7 +5475,7 @@ class HOr FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Or);
};
-class HXor FINAL : public HBinaryOperation {
+class HXor final : public HBinaryOperation {
public:
HXor(DataType::Type result_type,
HInstruction* left,
@@ -5267,25 +5484,25 @@ class HXor FINAL : public HBinaryOperation {
: HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) {
}
- bool IsCommutative() const OVERRIDE { return true; }
+ bool IsCommutative() const override { return true; }
template <typename T> static T Compute(T x, T y) { return x ^ y; }
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5296,7 +5513,7 @@ class HXor FINAL : public HBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Xor);
};
-class HRor FINAL : public HBinaryOperation {
+class HRor final : public HBinaryOperation {
public:
HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance)
: HBinaryOperation(kRor, result_type, value, distance) {
@@ -5317,26 +5534,26 @@ class HRor FINAL : public HBinaryOperation {
}
}
- HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxIntShiftDistance), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* value, HIntConstant* distance) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(value->GetValue(), distance->GetValue(), kMaxLongShiftDistance), GetDexPc());
}
HConstant* Evaluate(HLongConstant* value ATTRIBUTE_UNUSED,
- HLongConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HLongConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for the (long, long) case.";
UNREACHABLE();
}
HConstant* Evaluate(HFloatConstant* value ATTRIBUTE_UNUSED,
- HFloatConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* value ATTRIBUTE_UNUSED,
- HDoubleConstant* distance ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* distance ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5349,7 +5566,7 @@ class HRor FINAL : public HBinaryOperation {
// The value of a parameter in this method. Its location depends on
// the calling convention.
-class HParameterValue FINAL : public HExpression<0> {
+class HParameterValue final : public HExpression<0> {
public:
HParameterValue(const DexFile& dex_file,
dex::TypeIndex type_index,
@@ -5369,7 +5586,7 @@ class HParameterValue FINAL : public HExpression<0> {
uint8_t GetIndex() const { return index_; }
bool IsThis() const { return GetPackedFlag<kFlagIsThis>(); }
- bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+ bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); }
void SetCanBeNull(bool can_be_null) { SetPackedFlag<kFlagCanBeNull>(can_be_null); }
DECLARE_INSTRUCTION(ParameterValue);
@@ -5379,7 +5596,7 @@ class HParameterValue FINAL : public HExpression<0> {
private:
// Whether or not the parameter value corresponds to 'this' argument.
- static constexpr size_t kFlagIsThis = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagIsThis = kNumberOfGenericPackedBits;
static constexpr size_t kFlagCanBeNull = kFlagIsThis + 1;
static constexpr size_t kNumberOfParameterValuePackedBits = kFlagCanBeNull + 1;
static_assert(kNumberOfParameterValuePackedBits <= kMaxNumberOfPackedBits,
@@ -5392,30 +5609,30 @@ class HParameterValue FINAL : public HExpression<0> {
const uint8_t index_;
};
-class HNot FINAL : public HUnaryOperation {
+class HNot final : public HUnaryOperation {
public:
HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
: HUnaryOperation(kNot, result_type, input, dex_pc) {
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
template <typename T> static T Compute(T x) { return ~x; }
- HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x) const override {
return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x) const override {
return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5426,14 +5643,14 @@ class HNot FINAL : public HUnaryOperation {
DEFAULT_COPY_CONSTRUCTOR(Not);
};
-class HBooleanNot FINAL : public HUnaryOperation {
+class HBooleanNot final : public HUnaryOperation {
public:
explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc)
: HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) {
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
@@ -5442,18 +5659,18 @@ class HBooleanNot FINAL : public HUnaryOperation {
return !x;
}
- HConstant* Evaluate(HIntConstant* x) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x) const override {
return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for long values";
UNREACHABLE();
}
- HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
- HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const OVERRIDE {
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -5464,7 +5681,7 @@ class HBooleanNot FINAL : public HUnaryOperation {
DEFAULT_COPY_CONSTRUCTOR(BooleanNot);
};
-class HTypeConversion FINAL : public HExpression<1> {
+class HTypeConversion final : public HExpression<1> {
public:
// Instantiate a type conversion of `input` to `result_type`.
HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc)
@@ -5478,10 +5695,15 @@ class HTypeConversion FINAL : public HExpression<1> {
DataType::Type GetInputType() const { return GetInput()->GetType(); }
DataType::Type GetResultType() const { return GetType(); }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
+ // Return whether the conversion is implicit. This includes conversion to the same type.
+ bool IsImplicitConversion() const {
+ return DataType::IsTypeConversionImplicit(GetInputType(), GetResultType());
+ }
// Try to statically evaluate the conversion and return a HConstant
// containing the result. If the input cannot be converted, return nullptr.
@@ -5495,26 +5717,27 @@ class HTypeConversion FINAL : public HExpression<1> {
static constexpr uint32_t kNoRegNumber = -1;
-class HNullCheck FINAL : public HExpression<1> {
+class HNullCheck final : public HExpression<1> {
public:
// `HNullCheck` can trigger GC, as it may call the `NullPointerException`
- // constructor.
+ // constructor. However it can only do it on a fatal slow path so execution never returns to the
+ // instruction following the current one; thus 'SideEffects::None()' is used.
HNullCheck(HInstruction* value, uint32_t dex_pc)
- : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
+ : HExpression(kNullCheck, value->GetType(), SideEffects::None(), dex_pc) {
SetRawInputAt(0, value);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
- bool CanBeNull() const OVERRIDE { return false; }
+ bool CanBeNull() const override { return false; }
DECLARE_INSTRUCTION(NullCheck);
@@ -5559,7 +5782,7 @@ class FieldInfo : public ValueObject {
const DexFile& dex_file_;
};
-class HInstanceFieldGet FINAL : public HExpression<1> {
+class HInstanceFieldGet final : public HExpression<1> {
public:
HInstanceFieldGet(HInstruction* value,
ArtField* field,
@@ -5584,19 +5807,19 @@ class HInstanceFieldGet FINAL : public HExpression<1> {
SetRawInputAt(0, value);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return !IsVolatile(); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
const HInstanceFieldGet* other_get = other->AsInstanceFieldGet();
return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
}
- size_t ComputeHashCode() const OVERRIDE {
+ size_t ComputeHashCode() const override {
return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
}
@@ -5621,7 +5844,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> {
const FieldInfo field_info_;
};
-class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
+class HInstanceFieldSet final : public HExpression<2> {
public:
HInstanceFieldSet(HInstruction* object,
HInstruction* value,
@@ -5633,9 +5856,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
uint16_t declaring_class_def_index,
const DexFile& dex_file,
uint32_t dex_pc)
- : HTemplateInstruction(kInstanceFieldSet,
- SideEffects::FieldWriteOfType(field_type, is_volatile),
- dex_pc),
+ : HExpression(kInstanceFieldSet,
+ SideEffects::FieldWriteOfType(field_type, is_volatile),
+ dex_pc),
field_info_(field,
field_offset,
field_type,
@@ -5648,9 +5871,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
SetRawInputAt(1, value);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value());
}
@@ -5676,7 +5899,7 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> {
const FieldInfo field_info_;
};
-class HArrayGet FINAL : public HExpression<2> {
+class HArrayGet final : public HExpression<2> {
public:
HArrayGet(HInstruction* array,
HInstruction* index,
@@ -5687,7 +5910,7 @@ class HArrayGet FINAL : public HExpression<2> {
type,
SideEffects::ArrayReadOfType(type),
dex_pc,
- /* is_string_char_at */ false) {
+ /* is_string_char_at= */ false) {
}
HArrayGet(HInstruction* array,
@@ -5702,12 +5925,12 @@ class HArrayGet FINAL : public HExpression<2> {
SetRawInputAt(1, index);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
// TODO: We can be smarter here.
// Currently, unless the array is the result of NewArray, the array access is always
// preceded by some form of null NullCheck necessary for the bounds check, usually
@@ -5761,13 +5984,13 @@ class HArrayGet FINAL : public HExpression<2> {
// a particular HArrayGet is actually a String.charAt() by looking at the type
// of the input but that requires holding the mutator lock, so we prefer to use
// a flag, so that code generators don't need to do the locking.
- static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits;
static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1;
static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits,
"Too many packed fields.");
};
-class HArraySet FINAL : public HTemplateInstruction<3> {
+class HArraySet final : public HExpression<3> {
public:
HArraySet(HInstruction* array,
HInstruction* index,
@@ -5789,7 +6012,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> {
DataType::Type expected_component_type,
SideEffects side_effects,
uint32_t dex_pc)
- : HTemplateInstruction(kArraySet, side_effects, dex_pc) {
+ : HExpression(kArraySet, side_effects, dex_pc) {
SetPackedField<ExpectedComponentTypeField>(expected_component_type);
SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference);
SetPackedFlag<kFlagValueCanBeNull>(true);
@@ -5799,17 +6022,17 @@ class HArraySet FINAL : public HTemplateInstruction<3> {
SetRawInputAt(2, value);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool NeedsEnvironment() const OVERRIDE {
+ bool NeedsEnvironment() const override {
// We call a runtime method to throw ArrayStoreException.
return NeedsTypeCheck();
}
// Can throw ArrayStoreException.
- bool CanThrow() const OVERRIDE { return NeedsTypeCheck(); }
+ bool CanThrow() const override { return NeedsTypeCheck(); }
- bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj ATTRIBUTE_UNUSED) const override {
// TODO: Same as for ArrayGet.
return false;
}
@@ -5886,7 +6109,7 @@ class HArraySet FINAL : public HTemplateInstruction<3> {
BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>;
};
-class HArrayLength FINAL : public HExpression<1> {
+class HArrayLength final : public HExpression<1> {
public:
HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false)
: HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) {
@@ -5896,12 +6119,12 @@ class HArrayLength FINAL : public HExpression<1> {
SetRawInputAt(0, array);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE {
+ bool CanDoImplicitNullCheckOn(HInstruction* obj) const override {
return obj == InputAt(0);
}
@@ -5918,36 +6141,37 @@ class HArrayLength FINAL : public HExpression<1> {
// determine whether a particular HArrayLength is actually a String.length() by
// looking at the type of the input but that requires holding the mutator lock, so
// we prefer to use a flag, so that code generators don't need to do the locking.
- static constexpr size_t kFlagIsStringLength = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagIsStringLength = kNumberOfGenericPackedBits;
static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1;
static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits,
"Too many packed fields.");
};
-class HBoundsCheck FINAL : public HExpression<2> {
+class HBoundsCheck final : public HExpression<2> {
public:
// `HBoundsCheck` can trigger GC, as it may call the `IndexOutOfBoundsException`
- // constructor.
+ // constructor. However it can only do it on a fatal slow path so execution never returns to the
+ // instruction following the current one; thus 'SideEffects::None()' is used.
HBoundsCheck(HInstruction* index,
HInstruction* length,
uint32_t dex_pc,
bool is_string_char_at = false)
- : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) {
+ : HExpression(kBoundsCheck, index->GetType(), SideEffects::None(), dex_pc) {
DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType()));
SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at);
SetRawInputAt(0, index);
SetRawInputAt(1, length);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
bool IsStringCharAt() const { return GetPackedFlag<kFlagIsStringCharAt>(); }
@@ -5959,19 +6183,22 @@ class HBoundsCheck FINAL : public HExpression<2> {
DEFAULT_COPY_CONSTRUCTOR(BoundsCheck);
private:
- static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagIsStringCharAt = kNumberOfGenericPackedBits;
+ static constexpr size_t kNumberOfBoundsCheckPackedBits = kFlagIsStringCharAt + 1;
+ static_assert(kNumberOfBoundsCheckPackedBits <= HInstruction::kMaxNumberOfPackedBits,
+ "Too many packed fields.");
};
-class HSuspendCheck FINAL : public HTemplateInstruction<0> {
+class HSuspendCheck final : public HExpression<0> {
public:
explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc),
+ : HExpression(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc),
slow_path_(nullptr) {
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
- bool NeedsEnvironment() const OVERRIDE {
+ bool NeedsEnvironment() const override {
return true;
}
@@ -5991,13 +6218,13 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> {
// Pseudo-instruction which provides the native debugger with mapping information.
// It ensures that we can generate line number and local variables at this point.
-class HNativeDebugInfo : public HTemplateInstruction<0> {
+class HNativeDebugInfo : public HExpression<0> {
public:
explicit HNativeDebugInfo(uint32_t dex_pc)
- : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) {
+ : HExpression<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) {
}
- bool NeedsEnvironment() const OVERRIDE {
+ bool NeedsEnvironment() const override {
return true;
}
@@ -6010,7 +6237,7 @@ class HNativeDebugInfo : public HTemplateInstruction<0> {
/**
* Instruction to load a Class object.
*/
-class HLoadClass FINAL : public HInstruction {
+class HLoadClass final : public HInstruction {
public:
// Determines how to load the Class.
enum class LoadKind {
@@ -6024,18 +6251,18 @@ class HLoadClass FINAL : public HInstruction {
// Used for boot image classes referenced by boot image code.
kBootImageLinkTimePcRelative,
- // Use a known boot image Class* address, embedded in the code by the codegen.
- // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
- kBootImageAddress,
-
- // Use a PC-relative load from a boot image ClassTable mmapped into the .bss
- // of the oat file.
- kBootImageClassTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image classes referenced by apps in AOT-compiled code.
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
- // Used for classes outside boot image when .bss is accessible with a PC-relative load.
+ // Used for classes outside boot image referenced by AOT-compiled app and boot image code.
kBssEntry,
+ // Use a known boot image Class* address, embedded in the code by the codegen.
+ // Used for boot image classes referenced by apps in JIT-compiled code.
+ kJitBootImageAddress,
+
// Load from the root table associated with the JIT compiled method.
kJitTableAddress,
@@ -6053,12 +6280,14 @@ class HLoadClass FINAL : public HInstruction {
bool is_referrers_class,
uint32_t dex_pc,
bool needs_access_check)
- : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc),
+ : HInstruction(kLoadClass,
+ DataType::Type::kReference,
+ SideEffectsForArchRuntimeCalls(),
+ dex_pc),
special_input_(HUserRecord<HInstruction*>(current_method)),
type_index_(type_index),
dex_file_(dex_file),
- klass_(klass),
- loaded_class_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ klass_(klass) {
// Referrers class should not need access check. We never inline unverified
// methods so we can't possibly end up in this situation.
DCHECK(!is_referrers_class || !needs_access_check);
@@ -6068,9 +6297,10 @@ class HLoadClass FINAL : public HInstruction {
SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
SetPackedFlag<kFlagIsInBootImage>(false);
SetPackedFlag<kFlagGenerateClInitCheck>(false);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(false);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
void SetLoadKind(LoadKind load_kind);
@@ -6078,15 +6308,21 @@ class HLoadClass FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
- bool InstructionDataEquals(const HInstruction* other) const;
+ bool CanBeMoved() const override { return true; }
- size_t ComputeHashCode() const OVERRIDE { return type_index_.index_; }
+ bool InstructionDataEquals(const HInstruction* other) const override;
- bool CanBeNull() const OVERRIDE { return false; }
+ size_t ComputeHashCode() const override { return type_index_.index_; }
- bool NeedsEnvironment() const OVERRIDE {
+ bool CanBeNull() const override { return false; }
+
+ bool NeedsEnvironment() const override {
return CanCallRuntime();
}
@@ -6104,31 +6340,34 @@ class HLoadClass FINAL : public HInstruction {
GetLoadKind() == LoadKind::kBssEntry;
}
- bool CanThrow() const OVERRIDE {
+ bool CanThrow() const override {
return NeedsAccessCheck() ||
MustGenerateClinitCheck() ||
// If the class is in the boot image, the lookup in the runtime call cannot throw.
- // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and
- // PIC and subsequently avoids a DCE behavior dependency on the PIC option.
((GetLoadKind() == LoadKind::kRuntimeCall ||
GetLoadKind() == LoadKind::kBssEntry) &&
!IsInBootImage());
}
ReferenceTypeInfo GetLoadedClassRTI() {
- return loaded_class_rti_;
+ if (GetPackedFlag<kFlagValidLoadedClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- void SetLoadedClassRTI(ReferenceTypeInfo rti) {
- // Make sure we only set exact types (the loaded class should never be merged).
- DCHECK(rti.IsExact());
- loaded_class_rti_ = rti;
+ // Loaded class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidLoadedClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidLoadedClassRTI>(true);
}
dex::TypeIndex GetTypeIndex() const { return type_index_; }
const DexFile& GetDexFile() const { return dex_file_; }
- bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+ bool NeedsDexCacheOfDeclaringClass() const override {
return GetLoadKind() == LoadKind::kRuntimeCall;
}
@@ -6141,6 +6380,13 @@ class HLoadClass FINAL : public HInstruction {
bool IsInBootImage() const { return GetPackedFlag<kFlagIsInBootImage>(); }
bool MustGenerateClinitCheck() const { return GetPackedFlag<kFlagGenerateClInitCheck>(); }
+ bool MustResolveTypeOnSlowPath() const {
+ // Check that this instruction has a slow path.
+ DCHECK(GetLoadKind() != LoadKind::kRuntimeCall); // kRuntimeCall calls on main path.
+ DCHECK(GetLoadKind() == LoadKind::kBssEntry || MustGenerateClinitCheck());
+ return GetLoadKind() == LoadKind::kBssEntry;
+ }
+
void MarkInBootImage() {
SetPackedFlag<kFlagIsInBootImage>(true);
}
@@ -6148,15 +6394,11 @@ class HLoadClass FINAL : public HInstruction {
void AddSpecialInput(HInstruction* special_input);
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
return ArrayRef<HUserRecord<HInstruction*>>(
&special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
}
- DataType::Type GetType() const OVERRIDE {
- return DataType::Type::kReference;
- }
-
Handle<mirror::Class> GetClass() const {
return klass_;
}
@@ -6175,14 +6417,14 @@ class HLoadClass FINAL : public HInstruction {
static constexpr size_t kFieldLoadKind = kFlagGenerateClInitCheck + 1;
static constexpr size_t kFieldLoadKindSize =
MinimumBitsToStore(static_cast<size_t>(LoadKind::kLast));
- static constexpr size_t kNumberOfLoadClassPackedBits = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kFlagValidLoadedClassRTI = kFieldLoadKind + kFieldLoadKindSize;
+ static constexpr size_t kNumberOfLoadClassPackedBits = kFlagValidLoadedClassRTI + 1;
static_assert(kNumberOfLoadClassPackedBits < kMaxNumberOfPackedBits, "Too many packed fields.");
using LoadKindField = BitField<LoadKind, kFieldLoadKind, kFieldLoadKindSize>;
static bool HasTypeReference(LoadKind load_kind) {
return load_kind == LoadKind::kReferrersClass ||
load_kind == LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == LoadKind::kBootImageClassTable ||
load_kind == LoadKind::kBssEntry ||
load_kind == LoadKind::kRuntimeCall;
}
@@ -6203,8 +6445,6 @@ class HLoadClass FINAL : public HInstruction {
const DexFile& dex_file_;
Handle<mirror::Class> klass_;
-
- ReferenceTypeInfo loaded_class_rti_;
};
std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs);
@@ -6227,15 +6467,15 @@ inline void HLoadClass::AddSpecialInput(HInstruction* special_input) {
// The special input is used for PC-relative loads on some architectures,
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageClassTable ||
- GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry ||
+ GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind();
DCHECK(special_input_.GetInstruction() == nullptr);
special_input_ = HUserRecord<HInstruction*>(special_input);
special_input->AddUseAt(this, 0);
}
-class HLoadString FINAL : public HInstruction {
+class HLoadString final : public HInstruction {
public:
// Determines how to load the String.
enum class LoadKind {
@@ -6243,18 +6483,18 @@ class HLoadString FINAL : public HInstruction {
// Used for boot image strings referenced by boot image code.
kBootImageLinkTimePcRelative,
- // Use a known boot image String* address, embedded in the code by the codegen.
- // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
- kBootImageAddress,
-
- // Use a PC-relative load from a boot image InternTable mmapped into the .bss
- // of the oat file.
- kBootImageInternTable,
+ // Load from an entry in the .data.bimg.rel.ro using a PC-relative load.
+ // Used for boot image strings referenced by apps in AOT-compiled code.
+ kBootImageRelRo,
// Load from an entry in the .bss section using a PC-relative load.
- // Used for strings outside boot image when .bss is accessible with a PC-relative load.
+ // Used for strings outside boot image referenced by AOT-compiled app and boot image code.
kBssEntry,
+ // Use a known boot image String* address, embedded in the code by the codegen.
+ // Used for boot image strings referenced by apps in JIT-compiled code.
+ kJitBootImageAddress,
+
// Load from the root table associated with the JIT compiled method.
kJitTableAddress,
@@ -6269,14 +6509,17 @@ class HLoadString FINAL : public HInstruction {
dex::StringIndex string_index,
const DexFile& dex_file,
uint32_t dex_pc)
- : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc),
+ : HInstruction(kLoadString,
+ DataType::Type::kReference,
+ SideEffectsForArchRuntimeCalls(),
+ dex_pc),
special_input_(HUserRecord<HInstruction*>(current_method)),
string_index_(string_index),
dex_file_(dex_file) {
SetPackedField<LoadKindField>(LoadKind::kRuntimeCall);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
void SetLoadKind(LoadKind load_kind);
@@ -6284,6 +6527,12 @@ class HLoadString FINAL : public HInstruction {
return GetPackedField<LoadKindField>();
}
+ bool HasPcRelativeLoadKind() const {
+ return GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry;
+ }
+
const DexFile& GetDexFile() const {
return dex_file_;
}
@@ -6300,31 +6549,31 @@ class HLoadString FINAL : public HInstruction {
string_ = str;
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE;
+ bool InstructionDataEquals(const HInstruction* other) const override;
- size_t ComputeHashCode() const OVERRIDE { return string_index_.index_; }
+ size_t ComputeHashCode() const override { return string_index_.index_; }
// Will call the runtime if we need to load the string through
// the dex cache and the string is not guaranteed to be there yet.
- bool NeedsEnvironment() const OVERRIDE {
+ bool NeedsEnvironment() const override {
LoadKind load_kind = GetLoadKind();
if (load_kind == LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == LoadKind::kBootImageAddress ||
- load_kind == LoadKind::kBootImageInternTable ||
+ load_kind == LoadKind::kBootImageRelRo ||
+ load_kind == LoadKind::kJitBootImageAddress ||
load_kind == LoadKind::kJitTableAddress) {
return false;
}
return true;
}
- bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
+ bool NeedsDexCacheOfDeclaringClass() const override {
return GetLoadKind() == LoadKind::kRuntimeCall;
}
- bool CanBeNull() const OVERRIDE { return false; }
- bool CanThrow() const OVERRIDE { return NeedsEnvironment(); }
+ bool CanBeNull() const override { return false; }
+ bool CanThrow() const override { return NeedsEnvironment(); }
static SideEffects SideEffectsForArchRuntimeCalls() {
return SideEffects::CanTriggerGC();
@@ -6333,15 +6582,11 @@ class HLoadString FINAL : public HInstruction {
void AddSpecialInput(HInstruction* special_input);
using HInstruction::GetInputRecords; // Keep the const version visible.
- ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() OVERRIDE FINAL {
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
return ArrayRef<HUserRecord<HInstruction*>>(
&special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
}
- DataType::Type GetType() const OVERRIDE {
- return DataType::Type::kReference;
- }
-
DECLARE_INSTRUCTION(LoadString);
protected:
@@ -6389,9 +6634,9 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
// The special input is used for PC-relative loads on some architectures,
// including literal pool loads, which are PC-relative too.
DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative ||
- GetLoadKind() == LoadKind::kBootImageAddress ||
- GetLoadKind() == LoadKind::kBootImageInternTable ||
- GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind();
+ GetLoadKind() == LoadKind::kBootImageRelRo ||
+ GetLoadKind() == LoadKind::kBssEntry ||
+ GetLoadKind() == LoadKind::kJitBootImageAddress) << GetLoadKind();
// HLoadString::GetInputRecords() returns an empty array at this point,
// so use the GetInputRecords() from the base class to set the input record.
DCHECK(special_input_.GetInstruction() == nullptr);
@@ -6399,10 +6644,98 @@ inline void HLoadString::AddSpecialInput(HInstruction* special_input) {
special_input->AddUseAt(this, 0);
}
+class HLoadMethodHandle final : public HInstruction {
+ public:
+ HLoadMethodHandle(HCurrentMethod* current_method,
+ uint16_t method_handle_idx,
+ const DexFile& dex_file,
+ uint32_t dex_pc)
+ : HInstruction(kLoadMethodHandle,
+ DataType::Type::kReference,
+ SideEffectsForArchRuntimeCalls(),
+ dex_pc),
+ special_input_(HUserRecord<HInstruction*>(current_method)),
+ method_handle_idx_(method_handle_idx),
+ dex_file_(dex_file) {
+ }
+
+ using HInstruction::GetInputRecords; // Keep the const version visible.
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
+ return ArrayRef<HUserRecord<HInstruction*>>(
+ &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+ }
+
+ bool IsClonable() const override { return true; }
+
+ uint16_t GetMethodHandleIndex() const { return method_handle_idx_; }
+
+ const DexFile& GetDexFile() const { return dex_file_; }
+
+ static SideEffects SideEffectsForArchRuntimeCalls() {
+ return SideEffects::CanTriggerGC();
+ }
+
+ DECLARE_INSTRUCTION(LoadMethodHandle);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(LoadMethodHandle);
+
+ private:
+ // The special input is the HCurrentMethod for kRuntimeCall.
+ HUserRecord<HInstruction*> special_input_;
+
+ const uint16_t method_handle_idx_;
+ const DexFile& dex_file_;
+};
+
+class HLoadMethodType final : public HInstruction {
+ public:
+ HLoadMethodType(HCurrentMethod* current_method,
+ dex::ProtoIndex proto_index,
+ const DexFile& dex_file,
+ uint32_t dex_pc)
+ : HInstruction(kLoadMethodType,
+ DataType::Type::kReference,
+ SideEffectsForArchRuntimeCalls(),
+ dex_pc),
+ special_input_(HUserRecord<HInstruction*>(current_method)),
+ proto_index_(proto_index),
+ dex_file_(dex_file) {
+ }
+
+ using HInstruction::GetInputRecords; // Keep the const version visible.
+ ArrayRef<HUserRecord<HInstruction*>> GetInputRecords() final {
+ return ArrayRef<HUserRecord<HInstruction*>>(
+ &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u);
+ }
+
+ bool IsClonable() const override { return true; }
+
+ dex::ProtoIndex GetProtoIndex() const { return proto_index_; }
+
+ const DexFile& GetDexFile() const { return dex_file_; }
+
+ static SideEffects SideEffectsForArchRuntimeCalls() {
+ return SideEffects::CanTriggerGC();
+ }
+
+ DECLARE_INSTRUCTION(LoadMethodType);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(LoadMethodType);
+
+ private:
+ // The special input is the HCurrentMethod for kRuntimeCall.
+ HUserRecord<HInstruction*> special_input_;
+
+ const dex::ProtoIndex proto_index_;
+ const DexFile& dex_file_;
+};
+
/**
* Performs an initialization check on its Class object input.
*/
-class HClinitCheck FINAL : public HExpression<1> {
+class HClinitCheck final : public HExpression<1> {
public:
HClinitCheck(HLoadClass* constant, uint32_t dex_pc)
: HExpression(
@@ -6412,19 +6745,18 @@ class HClinitCheck FINAL : public HExpression<1> {
dex_pc) {
SetRawInputAt(0, constant);
}
-
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ // TODO: Make ClinitCheck clonable.
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool NeedsEnvironment() const OVERRIDE {
+ bool NeedsEnvironment() const override {
// May call runtime to initialize the class.
return true;
}
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
HLoadClass* GetLoadClass() const {
DCHECK(InputAt(0)->IsLoadClass());
@@ -6438,7 +6770,7 @@ class HClinitCheck FINAL : public HExpression<1> {
DEFAULT_COPY_CONSTRUCTOR(ClinitCheck);
};
-class HStaticFieldGet FINAL : public HExpression<1> {
+class HStaticFieldGet final : public HExpression<1> {
public:
HStaticFieldGet(HInstruction* cls,
ArtField* field,
@@ -6464,15 +6796,15 @@ class HStaticFieldGet FINAL : public HExpression<1> {
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return !IsVolatile(); }
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return !IsVolatile(); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
const HStaticFieldGet* other_get = other->AsStaticFieldGet();
return GetFieldOffset().SizeValue() == other_get->GetFieldOffset().SizeValue();
}
- size_t ComputeHashCode() const OVERRIDE {
+ size_t ComputeHashCode() const override {
return (HInstruction::ComputeHashCode() << 7) | GetFieldOffset().SizeValue();
}
@@ -6497,7 +6829,7 @@ class HStaticFieldGet FINAL : public HExpression<1> {
const FieldInfo field_info_;
};
-class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
+class HStaticFieldSet final : public HExpression<2> {
public:
HStaticFieldSet(HInstruction* cls,
HInstruction* value,
@@ -6509,9 +6841,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
uint16_t declaring_class_def_index,
const DexFile& dex_file,
uint32_t dex_pc)
- : HTemplateInstruction(kStaticFieldSet,
- SideEffects::FieldWriteOfType(field_type, is_volatile),
- dex_pc),
+ : HExpression(kStaticFieldSet,
+ SideEffects::FieldWriteOfType(field_type, is_volatile),
+ dex_pc),
field_info_(field,
field_offset,
field_type,
@@ -6524,7 +6856,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
SetRawInputAt(1, value);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
const FieldInfo& GetFieldInfo() const { return field_info_; }
MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); }
DataType::Type GetFieldType() const { return field_info_.GetFieldType(); }
@@ -6548,7 +6880,7 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> {
const FieldInfo field_info_;
};
-class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
+class HUnresolvedInstanceFieldGet final : public HExpression<1> {
public:
HUnresolvedInstanceFieldGet(HInstruction* obj,
DataType::Type field_type,
@@ -6562,9 +6894,9 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
SetRawInputAt(0, obj);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool NeedsEnvironment() const OVERRIDE { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool NeedsEnvironment() const override { return true; }
+ bool CanThrow() const override { return true; }
DataType::Type GetFieldType() const { return GetType(); }
uint32_t GetFieldIndex() const { return field_index_; }
@@ -6578,16 +6910,14 @@ class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> {
const uint32_t field_index_;
};
-class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
+class HUnresolvedInstanceFieldSet final : public HExpression<2> {
public:
HUnresolvedInstanceFieldSet(HInstruction* obj,
HInstruction* value,
DataType::Type field_type,
uint32_t field_index,
uint32_t dex_pc)
- : HTemplateInstruction(kUnresolvedInstanceFieldSet,
- SideEffects::AllExceptGCDependency(),
- dex_pc),
+ : HExpression(kUnresolvedInstanceFieldSet, SideEffects::AllExceptGCDependency(), dex_pc),
field_index_(field_index) {
SetPackedField<FieldTypeField>(field_type);
DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType()));
@@ -6595,9 +6925,9 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
SetRawInputAt(1, value);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool NeedsEnvironment() const OVERRIDE { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool NeedsEnvironment() const override { return true; }
+ bool CanThrow() const override { return true; }
DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); }
uint32_t GetFieldIndex() const { return field_index_; }
@@ -6620,7 +6950,7 @@ class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> {
const uint32_t field_index_;
};
-class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
+class HUnresolvedStaticFieldGet final : public HExpression<0> {
public:
HUnresolvedStaticFieldGet(DataType::Type field_type,
uint32_t field_index,
@@ -6632,9 +6962,9 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
field_index_(field_index) {
}
- bool IsClonable() const OVERRIDE { return true; }
- bool NeedsEnvironment() const OVERRIDE { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool NeedsEnvironment() const override { return true; }
+ bool CanThrow() const override { return true; }
DataType::Type GetFieldType() const { return GetType(); }
uint32_t GetFieldIndex() const { return field_index_; }
@@ -6648,24 +6978,22 @@ class HUnresolvedStaticFieldGet FINAL : public HExpression<0> {
const uint32_t field_index_;
};
-class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
+class HUnresolvedStaticFieldSet final : public HExpression<1> {
public:
HUnresolvedStaticFieldSet(HInstruction* value,
DataType::Type field_type,
uint32_t field_index,
uint32_t dex_pc)
- : HTemplateInstruction(kUnresolvedStaticFieldSet,
- SideEffects::AllExceptGCDependency(),
- dex_pc),
+ : HExpression(kUnresolvedStaticFieldSet, SideEffects::AllExceptGCDependency(), dex_pc),
field_index_(field_index) {
SetPackedField<FieldTypeField>(field_type);
DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType()));
SetRawInputAt(0, value);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool NeedsEnvironment() const OVERRIDE { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
+ bool NeedsEnvironment() const override { return true; }
+ bool CanThrow() const override { return true; }
DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); }
uint32_t GetFieldIndex() const { return field_index_; }
@@ -6689,13 +7017,13 @@ class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> {
};
// Implement the move-exception DEX instruction.
-class HLoadException FINAL : public HExpression<0> {
+class HLoadException final : public HExpression<0> {
public:
explicit HLoadException(uint32_t dex_pc = kNoDexPc)
: HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) {
}
- bool CanBeNull() const OVERRIDE { return false; }
+ bool CanBeNull() const override { return false; }
DECLARE_INSTRUCTION(LoadException);
@@ -6705,10 +7033,10 @@ class HLoadException FINAL : public HExpression<0> {
// Implicit part of move-exception which clears thread-local exception storage.
// Must not be removed because the runtime expects the TLS to get cleared.
-class HClearException FINAL : public HTemplateInstruction<0> {
+class HClearException final : public HExpression<0> {
public:
explicit HClearException(uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) {
+ : HExpression(kClearException, SideEffects::AllWrites(), dex_pc) {
}
DECLARE_INSTRUCTION(ClearException);
@@ -6717,20 +7045,20 @@ class HClearException FINAL : public HTemplateInstruction<0> {
DEFAULT_COPY_CONSTRUCTOR(ClearException);
};
-class HThrow FINAL : public HTemplateInstruction<1> {
+class HThrow final : public HExpression<1> {
public:
HThrow(HInstruction* exception, uint32_t dex_pc)
- : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) {
+ : HExpression(kThrow, SideEffects::CanTriggerGC(), dex_pc) {
SetRawInputAt(0, exception);
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
- bool CanThrow() const OVERRIDE { return true; }
+ bool CanThrow() const override { return true; }
- bool AlwaysThrows() const OVERRIDE { return true; }
+ bool AlwaysThrows() const override { return true; }
DECLARE_INSTRUCTION(Throw);
@@ -6750,75 +7078,165 @@ enum class TypeCheckKind {
kInterfaceCheck, // No optimization yet when checking against an interface.
kArrayObjectCheck, // Can just check if the array is not primitive.
kArrayCheck, // No optimization yet when checking against a generic array.
+ kBitstringCheck, // Compare the type check bitstring.
kLast = kArrayCheck
};
std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs);
-class HInstanceOf FINAL : public HExpression<2> {
+// Note: HTypeCheckInstruction is just a helper class, not an abstract instruction with an
+// `IsTypeCheckInstruction()`. (New virtual methods in the HInstruction class have a high cost.)
+class HTypeCheckInstruction : public HVariableInputSizeInstruction {
public:
- HInstanceOf(HInstruction* object,
- HLoadClass* target_class,
- TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HExpression(kInstanceOf,
- DataType::Type::kBool,
- SideEffectsForArchRuntimeCalls(check_kind),
- dex_pc) {
+ HTypeCheckInstruction(InstructionKind kind,
+ DataType::Type type,
+ HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask,
+ SideEffects side_effects)
+ : HVariableInputSizeInstruction(
+ kind,
+ type,
+ side_effects,
+ dex_pc,
+ allocator,
+ /* number_of_inputs= */ check_kind == TypeCheckKind::kBitstringCheck ? 4u : 2u,
+ kArenaAllocTypeCheckInputs),
+ klass_(klass) {
SetPackedField<TypeCheckKindField>(check_kind);
SetPackedFlag<kFlagMustDoNullCheck>(true);
+ SetPackedFlag<kFlagValidTargetClassRTI>(false);
SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
+ SetRawInputAt(1, target_class_or_null);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_path_to_root != nullptr);
+ DCHECK_EQ(check_kind == TypeCheckKind::kBitstringCheck, bitstring_mask != nullptr);
+ if (check_kind == TypeCheckKind::kBitstringCheck) {
+ DCHECK(target_class_or_null->IsNullConstant());
+ SetRawInputAt(2, bitstring_path_to_root);
+ SetRawInputAt(3, bitstring_mask);
+ } else {
+ DCHECK(target_class_or_null->IsLoadClass());
+ }
}
HLoadClass* GetTargetClass() const {
+ DCHECK_NE(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
HInstruction* load_class = InputAt(1);
DCHECK(load_class->IsLoadClass());
return load_class->AsLoadClass();
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
+ uint32_t GetBitstringPathToRoot() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* path_to_root = InputAt(2);
+ DCHECK(path_to_root->IsIntConstant());
+ return static_cast<uint32_t>(path_to_root->AsIntConstant()->GetValue());
+ }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
+ uint32_t GetBitstringMask() const {
+ DCHECK_EQ(GetTypeCheckKind(), TypeCheckKind::kBitstringCheck);
+ HInstruction* mask = InputAt(3);
+ DCHECK(mask->IsIntConstant());
+ return static_cast<uint32_t>(mask->AsIntConstant()->GetValue());
}
- bool NeedsEnvironment() const OVERRIDE {
- return CanCallRuntime(GetTypeCheckKind());
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+
+ bool InstructionDataEquals(const HInstruction* other) const override {
+ DCHECK(other->IsInstanceOf() || other->IsCheckCast()) << other->DebugName();
+ return GetPackedFields() == down_cast<const HTypeCheckInstruction*>(other)->GetPackedFields();
}
- // Used only in code generation.
bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
- static bool CanCallRuntime(TypeCheckKind check_kind) {
- // Mips currently does runtime calls for any other checks.
- return check_kind != TypeCheckKind::kExactCheck;
+ ReferenceTypeInfo GetTargetClassRTI() {
+ if (GetPackedFlag<kFlagValidTargetClassRTI>()) {
+ // Note: The is_exact flag from the return value should not be used.
+ return ReferenceTypeInfo::CreateUnchecked(klass_, /* is_exact= */ true);
+ } else {
+ return ReferenceTypeInfo::CreateInvalid();
+ }
}
- static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
- return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ // Target class RTI is marked as valid by RTP if the klass_ is admissible.
+ void SetValidTargetClassRTI() REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(klass_ != nullptr);
+ SetPackedFlag<kFlagValidTargetClassRTI>(true);
}
- DECLARE_INSTRUCTION(InstanceOf);
+ Handle<mirror::Class> GetClass() const {
+ return klass_;
+ }
protected:
- DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
+ DEFAULT_COPY_CONSTRUCTOR(TypeCheckInstruction);
private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
static constexpr size_t kFieldTypeCheckKindSize =
MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kFlagValidTargetClassRTI = kFlagMustDoNullCheck + 1;
+ static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagValidTargetClassRTI + 1;
static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
+
+ Handle<mirror::Class> klass_;
};
-class HBoundType FINAL : public HExpression<1> {
+class HInstanceOf final : public HTypeCheckInstruction {
+ public:
+ HInstanceOf(HInstruction* object,
+ HInstruction* target_class_or_null,
+ TypeCheckKind check_kind,
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(kInstanceOf,
+ DataType::Type::kBool,
+ object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffectsForArchRuntimeCalls(check_kind)) {}
+
+ bool IsClonable() const override { return true; }
+
+ bool NeedsEnvironment() const override {
+ return CanCallRuntime(GetTypeCheckKind());
+ }
+
+ static bool CanCallRuntime(TypeCheckKind check_kind) {
+ // Mips currently does runtime calls for any other checks.
+ return check_kind != TypeCheckKind::kExactCheck;
+ }
+
+ static SideEffects SideEffectsForArchRuntimeCalls(TypeCheckKind check_kind) {
+ return CanCallRuntime(check_kind) ? SideEffects::CanTriggerGC() : SideEffects::None();
+ }
+
+ DECLARE_INSTRUCTION(InstanceOf);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(InstanceOf);
+};
+
+class HBoundType final : public HExpression<1> {
public:
explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc)
: HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc),
@@ -6829,7 +7247,8 @@ class HBoundType FINAL : public HExpression<1> {
SetRawInputAt(0, input);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool InstructionDataEquals(const HInstruction* other) const override;
+ bool IsClonable() const override { return true; }
// {Get,Set}Upper* should only be used in reference type propagation.
const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; }
@@ -6841,7 +7260,7 @@ class HBoundType FINAL : public HExpression<1> {
SetPackedFlag<kFlagCanBeNull>(can_be_null);
}
- bool CanBeNull() const OVERRIDE { return GetPackedFlag<kFlagCanBeNull>(); }
+ bool CanBeNull() const override { return GetPackedFlag<kFlagCanBeNull>(); }
DECLARE_INSTRUCTION(BoundType);
@@ -6851,7 +7270,7 @@ class HBoundType FINAL : public HExpression<1> {
private:
// Represents the top constraint that can_be_null_ cannot exceed (i.e. if this
// is false then CanBeNull() cannot be true).
- static constexpr size_t kFlagUpperCanBeNull = kNumberOfExpressionPackedBits;
+ static constexpr size_t kFlagUpperCanBeNull = kNumberOfGenericPackedBits;
static constexpr size_t kFlagCanBeNull = kFlagUpperCanBeNull + 1;
static constexpr size_t kNumberOfBoundTypePackedBits = kFlagCanBeNull + 1;
static_assert(kNumberOfBoundTypePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
@@ -6865,57 +7284,40 @@ class HBoundType FINAL : public HExpression<1> {
ReferenceTypeInfo upper_bound_;
};
-class HCheckCast FINAL : public HTemplateInstruction<2> {
+class HCheckCast final : public HTypeCheckInstruction {
public:
HCheckCast(HInstruction* object,
- HLoadClass* target_class,
+ HInstruction* target_class_or_null,
TypeCheckKind check_kind,
- uint32_t dex_pc)
- : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) {
- SetPackedField<TypeCheckKindField>(check_kind);
- SetPackedFlag<kFlagMustDoNullCheck>(true);
- SetRawInputAt(0, object);
- SetRawInputAt(1, target_class);
- }
-
- HLoadClass* GetTargetClass() const {
- HInstruction* load_class = InputAt(1);
- DCHECK(load_class->IsLoadClass());
- return load_class->AsLoadClass();
- }
-
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
-
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
- return true;
- }
-
- bool NeedsEnvironment() const OVERRIDE {
+ Handle<mirror::Class> klass,
+ uint32_t dex_pc,
+ ArenaAllocator* allocator,
+ HIntConstant* bitstring_path_to_root,
+ HIntConstant* bitstring_mask)
+ : HTypeCheckInstruction(kCheckCast,
+ DataType::Type::kVoid,
+ object,
+ target_class_or_null,
+ check_kind,
+ klass,
+ dex_pc,
+ allocator,
+ bitstring_path_to_root,
+ bitstring_mask,
+ SideEffects::CanTriggerGC()) {}
+
+ bool IsClonable() const override { return true; }
+ bool NeedsEnvironment() const override {
// Instruction may throw a CheckCastError.
return true;
}
- bool CanThrow() const OVERRIDE { return true; }
-
- bool MustDoNullCheck() const { return GetPackedFlag<kFlagMustDoNullCheck>(); }
- void ClearMustDoNullCheck() { SetPackedFlag<kFlagMustDoNullCheck>(false); }
- TypeCheckKind GetTypeCheckKind() const { return GetPackedField<TypeCheckKindField>(); }
- bool IsExactCheck() const { return GetTypeCheckKind() == TypeCheckKind::kExactCheck; }
+ bool CanThrow() const override { return true; }
DECLARE_INSTRUCTION(CheckCast);
protected:
DEFAULT_COPY_CONSTRUCTOR(CheckCast);
-
- private:
- static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeCheckKindSize =
- MinimumBitsToStore(static_cast<size_t>(TypeCheckKind::kLast));
- static constexpr size_t kFlagMustDoNullCheck = kFieldTypeCheckKind + kFieldTypeCheckKindSize;
- static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1;
- static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>;
};
/**
@@ -6944,17 +7346,16 @@ enum MemBarrierKind {
};
std::ostream& operator<<(std::ostream& os, const MemBarrierKind& kind);
-class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
+class HMemoryBarrier final : public HExpression<0> {
public:
explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(
- kMemoryBarrier,
- SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays.
- dex_pc) {
+ : HExpression(kMemoryBarrier,
+ SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays.
+ dex_pc) {
SetPackedField<BarrierKindField>(barrier_kind);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
MemBarrierKind GetBarrierKind() { return GetPackedField<BarrierKindField>(); }
@@ -7027,10 +7428,10 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> {
// }
//
// See also:
-// * CompilerDriver::RequiresConstructorBarrier
+// * DexCompilationUnit::RequiresConstructorBarrier
// * QuasiAtomic::ThreadFenceForConstructor
//
-class HConstructorFence FINAL : public HVariableInputSizeInstruction {
+class HConstructorFence final : public HVariableInputSizeInstruction {
// A fence has variable inputs because the inputs can be removed
// after prepare_for_register_allocation phase.
// (TODO: In the future a fence could freeze multiple objects
@@ -7073,7 +7474,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction {
SideEffects::AllReads(),
dex_pc,
allocator,
- /* number_of_inputs */ 1,
+ /* number_of_inputs= */ 1,
kArenaAllocConstructorFenceInputs) {
DCHECK(fence_object != nullptr);
SetRawInputAt(0, fence_object);
@@ -7127,7 +7528,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction {
DEFAULT_COPY_CONSTRUCTOR(ConstructorFence);
};
-class HMonitorOperation FINAL : public HTemplateInstruction<1> {
+class HMonitorOperation final : public HExpression<1> {
public:
enum class OperationKind {
kEnter,
@@ -7136,18 +7537,17 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> {
};
HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc)
- : HTemplateInstruction(
- kMonitorOperation,
- SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays.
- dex_pc) {
+ : HExpression(kMonitorOperation,
+ SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays.
+ dex_pc) {
SetPackedField<OperationKindField>(kind);
SetRawInputAt(0, object);
}
// Instruction may go into runtime, so we need an environment.
- bool NeedsEnvironment() const OVERRIDE { return true; }
+ bool NeedsEnvironment() const override { return true; }
- bool CanThrow() const OVERRIDE {
+ bool CanThrow() const override {
// Verifier guarantees that monitor-exit cannot throw.
// This is important because it allows the HGraphBuilder to remove
// a dead throw-catch loop generated for `synchronized` blocks/methods.
@@ -7173,7 +7573,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> {
using OperationKindField = BitField<OperationKind, kFieldOperationKind, kFieldOperationKindSize>;
};
-class HSelect FINAL : public HExpression<3> {
+class HSelect final : public HExpression<3> {
public:
HSelect(HInstruction* condition,
HInstruction* true_value,
@@ -7191,17 +7591,17 @@ class HSelect FINAL : public HExpression<3> {
SetRawInputAt(2, condition);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
HInstruction* GetFalseValue() const { return InputAt(0); }
HInstruction* GetTrueValue() const { return InputAt(1); }
HInstruction* GetCondition() const { return InputAt(2); }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool CanBeNull() const OVERRIDE {
+ bool CanBeNull() const override {
return GetTrueValue()->CanBeNull() || GetFalseValue()->CanBeNull();
}
@@ -7289,10 +7689,10 @@ std::ostream& operator<<(std::ostream& os, const MoveOperands& rhs);
static constexpr size_t kDefaultNumberOfMoves = 4;
-class HParallelMove FINAL : public HTemplateInstruction<0> {
+class HParallelMove final : public HExpression<0> {
public:
explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc)
- : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc),
+ : HExpression(kParallelMove, SideEffects::None(), dex_pc),
moves_(allocator->Adapter(kArenaAllocMoveOperands)) {
moves_.reserve(kDefaultNumberOfMoves);
}
@@ -7351,7 +7751,7 @@ class HParallelMove FINAL : public HTemplateInstruction<0> {
// never used across anything that can trigger GC.
// The result of this instruction is not a pointer in the sense of `DataType::Type::kreference`.
// So we represent it by the type `DataType::Type::kInt`.
-class HIntermediateAddress FINAL : public HExpression<2> {
+class HIntermediateAddress final : public HExpression<2> {
public:
HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc)
: HExpression(kIntermediateAddress,
@@ -7365,12 +7765,12 @@ class HIntermediateAddress FINAL : public HExpression<2> {
SetRawInputAt(1, offset);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool IsActualObject() const OVERRIDE { return false; }
+ bool IsActualObject() const override { return false; }
HInstruction* GetBaseAddress() const { return InputAt(0); }
HInstruction* GetOffset() const { return InputAt(1); }
@@ -7392,7 +7792,7 @@ class HIntermediateAddress FINAL : public HExpression<2> {
#ifdef ART_ENABLE_CODEGEN_mips
#include "nodes_mips.h"
#endif
-#ifdef ART_ENABLE_CODEGEN_x86
+#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#include "nodes_x86.h"
#endif
@@ -7443,7 +7843,7 @@ class HGraphDelegateVisitor : public HGraphVisitor {
// Visit functions that delegate to to super class.
#define DECLARE_VISIT_INSTRUCTION(name, super) \
- void Visit##name(H##name* instr) OVERRIDE { Visit##super(instr); }
+ void Visit##name(H##name* instr) override { Visit##super(instr); }
FOR_EACH_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
@@ -7465,7 +7865,7 @@ class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor {
explicit CloneAndReplaceInstructionVisitor(HGraph* graph)
: HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {}
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
if (instruction->IsClonable()) {
ReplaceInstrOrPhiByClone(instruction);
instr_replaced_by_clones_count_++;
@@ -7584,8 +7984,30 @@ inline bool IsZeroBitPattern(HInstruction* instruction) {
return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
}
+// Implement HInstruction::Is##type() for concrete instructions.
#define INSTRUCTION_TYPE_CHECK(type, super) \
- inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \
+ inline bool HInstruction::Is##type() const { return GetKind() == k##type; }
+ FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+
+// Implement HInstruction::Is##type() for abstract instructions.
+#define INSTRUCTION_TYPE_CHECK_RESULT(type, super) \
+ std::is_base_of<BaseType, H##type>::value,
+#define INSTRUCTION_TYPE_CHECK(type, super) \
+ inline bool HInstruction::Is##type() const { \
+ DCHECK_LT(GetKind(), kLastInstructionKind); \
+ using BaseType = H##type; \
+ static constexpr bool results[] = { \
+ FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK_RESULT) \
+ }; \
+ return results[static_cast<size_t>(GetKind())]; \
+ }
+
+ FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
+#undef INSTRUCTION_TYPE_CHECK
+#undef INSTRUCTION_TYPE_CHECK_RESULT
+
+#define INSTRUCTION_TYPE_CAST(type, super) \
inline const H##type* HInstruction::As##type() const { \
return Is##type() ? down_cast<const H##type*>(this) : nullptr; \
} \
@@ -7593,8 +8015,9 @@ inline bool IsZeroBitPattern(HInstruction* instruction) {
return Is##type() ? static_cast<H##type*>(this) : nullptr; \
}
- FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
-#undef INSTRUCTION_TYPE_CHECK
+ FOR_EACH_INSTRUCTION(INSTRUCTION_TYPE_CAST)
+#undef INSTRUCTION_TYPE_CAST
+
// Create space in `blocks` for adding `number_of_new_blocks` entries
// starting at location `at`. Blocks after `at` are moved accordingly.
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
index d0e0fef946..4993f5737e 100644
--- a/compiler/optimizing/nodes_mips.h
+++ b/compiler/optimizing/nodes_mips.h
@@ -30,7 +30,7 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> {
kNoDexPc) {
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress);
@@ -39,21 +39,21 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> {
};
// Mips version of HPackedSwitch that holds a pointer to the base method address.
-class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> {
+class HMipsPackedSwitch final : public HExpression<2> {
public:
HMipsPackedSwitch(int32_t start_value,
int32_t num_entries,
HInstruction* input,
HMipsComputeBaseMethodAddress* method_base,
uint32_t dex_pc)
- : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc),
+ : HExpression(kMipsPackedSwitch, SideEffects::None(), dex_pc),
start_value_(start_value),
num_entries_(num_entries) {
SetRawInputAt(0, input);
SetRawInputAt(1, method_base);
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
int32_t GetStartValue() const { return start_value_; }
@@ -91,7 +91,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> {
//
// Note: as the instruction doesn't involve base array address into computations it has no side
// effects.
-class HIntermediateArrayAddressIndex FINAL : public HExpression<2> {
+class HIntermediateArrayAddressIndex final : public HExpression<2> {
public:
HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc)
: HExpression(kIntermediateArrayAddressIndex,
@@ -102,11 +102,11 @@ class HIntermediateArrayAddressIndex FINAL : public HExpression<2> {
SetRawInputAt(1, shift);
}
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool IsActualObject() const OVERRIDE { return false; }
+ bool IsActualObject() const override { return false; }
HInstruction* GetIndex() const { return InputAt(0); }
HInstruction* GetShift() const { return InputAt(1); }
diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h
index 29358e1141..7dcac1787e 100644
--- a/compiler/optimizing/nodes_shared.h
+++ b/compiler/optimizing/nodes_shared.h
@@ -24,7 +24,7 @@
namespace art {
-class HMultiplyAccumulate FINAL : public HExpression<3> {
+class HMultiplyAccumulate final : public HExpression<3> {
public:
HMultiplyAccumulate(DataType::Type type,
InstructionKind op,
@@ -39,14 +39,14 @@ class HMultiplyAccumulate FINAL : public HExpression<3> {
SetRawInputAt(kInputMulRightIndex, mul_right);
}
- bool IsClonable() const OVERRIDE { return true; }
+ bool IsClonable() const override { return true; }
static constexpr int kInputAccumulatorIndex = 0;
static constexpr int kInputMulLeftIndex = 1;
static constexpr int kInputMulRightIndex = 2;
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other) const override {
return op_kind_ == other->AsMultiplyAccumulate()->op_kind_;
}
@@ -62,7 +62,7 @@ class HMultiplyAccumulate FINAL : public HExpression<3> {
const InstructionKind op_kind_;
};
-class HBitwiseNegatedRight FINAL : public HBinaryOperation {
+class HBitwiseNegatedRight final : public HBinaryOperation {
public:
HBitwiseNegatedRight(DataType::Type result_type,
InstructionKind op,
@@ -97,21 +97,21 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation {
}
}
- HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
return GetBlock()->GetGraph()->GetIntConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
- HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const OVERRIDE {
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
return GetBlock()->GetGraph()->GetLongConstant(
Compute(x->GetValue(), y->GetValue()), GetDexPc());
}
HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
- HFloatConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for float values";
UNREACHABLE();
}
HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
- HDoubleConstant* y ATTRIBUTE_UNUSED) const OVERRIDE {
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
LOG(FATAL) << DebugName() << " is not defined for double values";
UNREACHABLE();
}
@@ -145,7 +145,7 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation {
//
// Note: as the instruction doesn't involve base array address into computations it has no side
// effects (in comparison of HIntermediateAddress).
-class HIntermediateAddressIndex FINAL : public HExpression<3> {
+class HIntermediateAddressIndex final : public HExpression<3> {
public:
HIntermediateAddressIndex(
HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc)
@@ -158,12 +158,12 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> {
SetRawInputAt(2, shift);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const override {
return true;
}
- bool IsActualObject() const OVERRIDE { return false; }
+ bool IsActualObject() const override { return false; }
HInstruction* GetIndex() const { return InputAt(0); }
HInstruction* GetOffset() const { return InputAt(1); }
@@ -175,7 +175,7 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> {
DEFAULT_COPY_CONSTRUCTOR(IntermediateAddressIndex);
};
-class HDataProcWithShifterOp FINAL : public HExpression<2> {
+class HDataProcWithShifterOp final : public HExpression<2> {
public:
enum OpKind {
kLSL, // Logical shift left.
@@ -212,9 +212,9 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> {
SetRawInputAt(1, right);
}
- bool IsClonable() const OVERRIDE { return true; }
- bool CanBeMoved() const OVERRIDE { return true; }
- bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE {
+ bool IsClonable() const override { return true; }
+ bool CanBeMoved() const override { return true; }
+ bool InstructionDataEquals(const HInstruction* other_instr) const override {
const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp();
return instr_kind_ == other->instr_kind_ &&
op_kind_ == other->op_kind_ &&
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
index 20f6cf01ed..efe4d6b000 100644
--- a/compiler/optimizing/nodes_vector.h
+++ b/compiler/optimizing/nodes_vector.h
@@ -79,13 +79,14 @@ class HVecOperation : public HVariableInputSizeInstruction {
size_t vector_length,
uint32_t dex_pc)
: HVariableInputSizeInstruction(kind,
+ kSIMDType,
side_effects,
dex_pc,
allocator,
number_of_inputs,
kArenaAllocVectorNode),
vector_length_(vector_length) {
- SetPackedField<TypeField>(packed_type);
+ SetPackedField<PackedTypeField>(packed_type);
DCHECK_LT(1u, vector_length);
}
@@ -99,14 +100,9 @@ class HVecOperation : public HVariableInputSizeInstruction {
return vector_length_ * DataType::Size(GetPackedType());
}
- // Returns the type of the vector operation.
- DataType::Type GetType() const OVERRIDE {
- return kSIMDType;
- }
-
// Returns the true component type packed in a vector.
DataType::Type GetPackedType() const {
- return GetPackedField<TypeField>();
+ return GetPackedField<PackedTypeField>();
}
// Assumes vector nodes cannot be moved by default. Each concrete implementation
@@ -121,12 +117,12 @@ class HVecOperation : public HVariableInputSizeInstruction {
// Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be
// altered to return true if the instruction might reside outside the SIMD loop body since SIMD
// registers are not kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
// Tests if all data of a vector node (vector length and packed type) is equal.
// Each concrete implementation that adds more fields should test equality of
// those fields in its own method *and* call all super methods.
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecOperation());
const HVecOperation* o = other->AsVecOperation();
return GetVectorLength() == o->GetVectorLength() && GetPackedType() == o->GetPackedType();
@@ -185,12 +181,12 @@ class HVecOperation : public HVariableInputSizeInstruction {
protected:
// Additional packed bits.
- static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
- static constexpr size_t kFieldTypeSize =
+ static constexpr size_t kFieldPackedType = HInstruction::kNumberOfGenericPackedBits;
+ static constexpr size_t kFieldPackedTypeSize =
MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast));
- static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize;
+ static constexpr size_t kNumberOfVectorOpPackedBits = kFieldPackedType + kFieldPackedTypeSize;
static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
- using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>;
+ using PackedTypeField = BitField<DataType::Type, kFieldPackedType, kFieldPackedTypeSize>;
DEFAULT_COPY_CONSTRUCTOR(VecOperation);
@@ -211,7 +207,7 @@ class HVecUnaryOperation : public HVecOperation {
allocator,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ 1,
+ /* number_of_inputs= */ 1,
vector_length,
dex_pc) {
SetRawInputAt(0, input);
@@ -239,7 +235,7 @@ class HVecBinaryOperation : public HVecOperation {
allocator,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ 2,
+ /* number_of_inputs= */ 2,
vector_length,
dex_pc) {
SetRawInputAt(0, left);
@@ -284,7 +280,7 @@ class HVecMemoryOperation : public HVecOperation {
HInstruction* GetArray() const { return InputAt(0); }
HInstruction* GetIndex() const { return InputAt(1); }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecMemoryOperation());
const HVecMemoryOperation* o = other->AsVecMemoryOperation();
return HVecOperation::InstructionDataEquals(o) && GetAlignment() == o->GetAlignment();
@@ -319,7 +315,7 @@ inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type
// Replicates the given scalar into a vector,
// viz. replicate(x) = [ x, .. , x ].
-class HVecReplicateScalar FINAL : public HVecUnaryOperation {
+class HVecReplicateScalar final : public HVecUnaryOperation {
public:
HVecReplicateScalar(ArenaAllocator* allocator,
HInstruction* scalar,
@@ -333,7 +329,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
// A replicate needs to stay in place, since SIMD registers are not
// kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
DECLARE_INSTRUCTION(VecReplicateScalar);
@@ -345,7 +341,7 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation {
// viz. extract[ x1, .. , xn ] = x_i.
//
// TODO: for now only i == 1 case supported.
-class HVecExtractScalar FINAL : public HVecUnaryOperation {
+class HVecExtractScalar final : public HVecUnaryOperation {
public:
HVecExtractScalar(ArenaAllocator* allocator,
HInstruction* input,
@@ -358,16 +354,14 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation {
DCHECK(HasConsistentPackedTypes(input, packed_type));
DCHECK_LT(index, vector_length);
DCHECK_EQ(index, 0u);
- }
-
- // Yields a single component in the vector.
- DataType::Type GetType() const OVERRIDE {
- return GetPackedType();
+ // Yields a single component in the vector.
+ // Overrides the kSIMDType set by the VecOperation constructor.
+ SetPackedField<TypeField>(packed_type);
}
// An extract needs to stay in place, since SIMD registers are not
// kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
DECLARE_INSTRUCTION(VecExtractScalar);
@@ -378,7 +372,7 @@ class HVecExtractScalar FINAL : public HVecUnaryOperation {
// Reduces the given vector into the first element as sum/min/max,
// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi
// and the "-" denotes "don't care" (implementation dependent).
-class HVecReduce FINAL : public HVecUnaryOperation {
+class HVecReduce final : public HVecUnaryOperation {
public:
enum ReductionKind {
kSum = 1,
@@ -390,21 +384,21 @@ class HVecReduce FINAL : public HVecUnaryOperation {
HInstruction* input,
DataType::Type packed_type,
size_t vector_length,
- ReductionKind kind,
+ ReductionKind reduction_kind,
uint32_t dex_pc)
: HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc),
- kind_(kind) {
+ reduction_kind_(reduction_kind) {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- ReductionKind GetKind() const { return kind_; }
+ ReductionKind GetReductionKind() const { return reduction_kind_; }
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecReduce());
const HVecReduce* o = other->AsVecReduce();
- return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind();
+ return HVecOperation::InstructionDataEquals(o) && GetReductionKind() == o->GetReductionKind();
}
DECLARE_INSTRUCTION(VecReduce);
@@ -413,12 +407,12 @@ class HVecReduce FINAL : public HVecUnaryOperation {
DEFAULT_COPY_CONSTRUCTOR(VecReduce);
private:
- const ReductionKind kind_;
+ const ReductionKind reduction_kind_;
};
// Converts every component in the vector,
// viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ].
-class HVecCnv FINAL : public HVecUnaryOperation {
+class HVecCnv final : public HVecUnaryOperation {
public:
HVecCnv(ArenaAllocator* allocator,
HInstruction* input,
@@ -433,7 +427,7 @@ class HVecCnv FINAL : public HVecUnaryOperation {
DataType::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
DataType::Type GetResultType() const { return GetPackedType(); }
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecCnv);
@@ -443,7 +437,7 @@ class HVecCnv FINAL : public HVecUnaryOperation {
// Negates every component in the vector,
// viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ].
-class HVecNeg FINAL : public HVecUnaryOperation {
+class HVecNeg final : public HVecUnaryOperation {
public:
HVecNeg(ArenaAllocator* allocator,
HInstruction* input,
@@ -454,7 +448,7 @@ class HVecNeg FINAL : public HVecUnaryOperation {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecNeg);
@@ -465,7 +459,7 @@ class HVecNeg FINAL : public HVecUnaryOperation {
// Takes absolute value of every component in the vector,
// viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ]
// for signed operand x.
-class HVecAbs FINAL : public HVecUnaryOperation {
+class HVecAbs final : public HVecUnaryOperation {
public:
HVecAbs(ArenaAllocator* allocator,
HInstruction* input,
@@ -476,7 +470,7 @@ class HVecAbs FINAL : public HVecUnaryOperation {
DCHECK(HasConsistentPackedTypes(input, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecAbs);
@@ -487,7 +481,7 @@ class HVecAbs FINAL : public HVecUnaryOperation {
// Bitwise- or boolean-nots every component in the vector,
// viz. not[ x1, .. , xn ] = [ ~x1, .. , ~xn ], or
// not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean.
-class HVecNot FINAL : public HVecUnaryOperation {
+class HVecNot final : public HVecUnaryOperation {
public:
HVecNot(ArenaAllocator* allocator,
HInstruction* input,
@@ -498,7 +492,7 @@ class HVecNot FINAL : public HVecUnaryOperation {
DCHECK(input->IsVecOperation());
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecNot);
@@ -512,7 +506,7 @@ class HVecNot FINAL : public HVecUnaryOperation {
// Adds every component in the two vectors,
// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ].
-class HVecAdd FINAL : public HVecBinaryOperation {
+class HVecAdd final : public HVecBinaryOperation {
public:
HVecAdd(ArenaAllocator* allocator,
HInstruction* left,
@@ -525,7 +519,7 @@ class HVecAdd FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecAdd);
@@ -533,11 +527,36 @@ class HVecAdd FINAL : public HVecBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(VecAdd);
};
+// Adds every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 +_sat y1, .. , xn +_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationAdd final : public HVecBinaryOperation {
+ public:
+ HVecSaturationAdd(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecBinaryOperation(
+ kVecSaturationAdd, allocator, left, right, packed_type, vector_length, dex_pc) {
+ DCHECK(HasConsistentPackedTypes(left, packed_type));
+ DCHECK(HasConsistentPackedTypes(right, packed_type));
+ }
+
+ bool CanBeMoved() const override { return true; }
+
+ DECLARE_INSTRUCTION(VecSaturationAdd);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecSaturationAdd);
+};
+
// Performs halving add on every component in the two vectors, viz.
// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ]
// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ]
// for either both signed or both unsigned operands x, y (reflected in packed_type).
-class HVecHalvingAdd FINAL : public HVecBinaryOperation {
+class HVecHalvingAdd final : public HVecBinaryOperation {
public:
HVecHalvingAdd(ArenaAllocator* allocator,
HInstruction* left,
@@ -555,9 +574,9 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation {
bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); }
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecHalvingAdd());
const HVecHalvingAdd* o = other->AsVecHalvingAdd();
return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded();
@@ -577,7 +596,7 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation {
// Subtracts every component in the two vectors,
// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
-class HVecSub FINAL : public HVecBinaryOperation {
+class HVecSub final : public HVecBinaryOperation {
public:
HVecSub(ArenaAllocator* allocator,
HInstruction* left,
@@ -590,7 +609,7 @@ class HVecSub FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecSub);
@@ -598,9 +617,34 @@ class HVecSub FINAL : public HVecBinaryOperation {
DEFAULT_COPY_CONSTRUCTOR(VecSub);
};
+// Subtracts every component in the two vectors using saturation arithmetic,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 -_sat y1, .. , xn -_sat yn ]
+// for either both signed or both unsigned operands x, y (reflected in packed_type).
+class HVecSaturationSub final : public HVecBinaryOperation {
+ public:
+ HVecSaturationSub(ArenaAllocator* allocator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecBinaryOperation(
+ kVecSaturationSub, allocator, left, right, packed_type, vector_length, dex_pc) {
+ DCHECK(HasConsistentPackedTypes(left, packed_type));
+ DCHECK(HasConsistentPackedTypes(right, packed_type));
+ }
+
+ bool CanBeMoved() const override { return true; }
+
+ DECLARE_INSTRUCTION(VecSaturationSub);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecSaturationSub);
+};
+
// Multiplies every component in the two vectors,
// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
-class HVecMul FINAL : public HVecBinaryOperation {
+class HVecMul final : public HVecBinaryOperation {
public:
HVecMul(ArenaAllocator* allocator,
HInstruction* left,
@@ -613,7 +657,7 @@ class HVecMul FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecMul);
@@ -623,7 +667,7 @@ class HVecMul FINAL : public HVecBinaryOperation {
// Divides every component in the two vectors,
// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ].
-class HVecDiv FINAL : public HVecBinaryOperation {
+class HVecDiv final : public HVecBinaryOperation {
public:
HVecDiv(ArenaAllocator* allocator,
HInstruction* left,
@@ -636,7 +680,7 @@ class HVecDiv FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecDiv);
@@ -647,7 +691,7 @@ class HVecDiv FINAL : public HVecBinaryOperation {
// Takes minimum of every component in the two vectors,
// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ]
// for either both signed or both unsigned operands x, y (reflected in packed_type).
-class HVecMin FINAL : public HVecBinaryOperation {
+class HVecMin final : public HVecBinaryOperation {
public:
HVecMin(ArenaAllocator* allocator,
HInstruction* left,
@@ -660,7 +704,7 @@ class HVecMin FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecMin);
@@ -671,7 +715,7 @@ class HVecMin FINAL : public HVecBinaryOperation {
// Takes maximum of every component in the two vectors,
// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ]
// for either both signed or both unsigned operands x, y (reflected in packed_type).
-class HVecMax FINAL : public HVecBinaryOperation {
+class HVecMax final : public HVecBinaryOperation {
public:
HVecMax(ArenaAllocator* allocator,
HInstruction* left,
@@ -684,7 +728,7 @@ class HVecMax FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(right, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecMax);
@@ -694,7 +738,7 @@ class HVecMax FINAL : public HVecBinaryOperation {
// Bitwise-ands every component in the two vectors,
// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
-class HVecAnd FINAL : public HVecBinaryOperation {
+class HVecAnd final : public HVecBinaryOperation {
public:
HVecAnd(ArenaAllocator* allocator,
HInstruction* left,
@@ -706,7 +750,7 @@ class HVecAnd FINAL : public HVecBinaryOperation {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecAnd);
@@ -716,7 +760,7 @@ class HVecAnd FINAL : public HVecBinaryOperation {
// Bitwise-and-nots every component in the two vectors,
// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ].
-class HVecAndNot FINAL : public HVecBinaryOperation {
+class HVecAndNot final : public HVecBinaryOperation {
public:
HVecAndNot(ArenaAllocator* allocator,
HInstruction* left,
@@ -729,7 +773,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecAndNot);
@@ -739,7 +783,7 @@ class HVecAndNot FINAL : public HVecBinaryOperation {
// Bitwise-ors every component in the two vectors,
// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ].
-class HVecOr FINAL : public HVecBinaryOperation {
+class HVecOr final : public HVecBinaryOperation {
public:
HVecOr(ArenaAllocator* allocator,
HInstruction* left,
@@ -751,7 +795,7 @@ class HVecOr FINAL : public HVecBinaryOperation {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecOr);
@@ -761,7 +805,7 @@ class HVecOr FINAL : public HVecBinaryOperation {
// Bitwise-xors every component in the two vectors,
// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ].
-class HVecXor FINAL : public HVecBinaryOperation {
+class HVecXor final : public HVecBinaryOperation {
public:
HVecXor(ArenaAllocator* allocator,
HInstruction* left,
@@ -773,7 +817,7 @@ class HVecXor FINAL : public HVecBinaryOperation {
DCHECK(left->IsVecOperation() && right->IsVecOperation());
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecXor);
@@ -783,7 +827,7 @@ class HVecXor FINAL : public HVecBinaryOperation {
// Logically shifts every component in the vector left by the given distance,
// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ].
-class HVecShl FINAL : public HVecBinaryOperation {
+class HVecShl final : public HVecBinaryOperation {
public:
HVecShl(ArenaAllocator* allocator,
HInstruction* left,
@@ -795,7 +839,7 @@ class HVecShl FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecShl);
@@ -805,7 +849,7 @@ class HVecShl FINAL : public HVecBinaryOperation {
// Arithmetically shifts every component in the vector right by the given distance,
// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ].
-class HVecShr FINAL : public HVecBinaryOperation {
+class HVecShr final : public HVecBinaryOperation {
public:
HVecShr(ArenaAllocator* allocator,
HInstruction* left,
@@ -817,7 +861,7 @@ class HVecShr FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecShr);
@@ -827,7 +871,7 @@ class HVecShr FINAL : public HVecBinaryOperation {
// Logically shifts every component in the vector right by the given distance,
// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ].
-class HVecUShr FINAL : public HVecBinaryOperation {
+class HVecUShr final : public HVecBinaryOperation {
public:
HVecUShr(ArenaAllocator* allocator,
HInstruction* left,
@@ -839,7 +883,7 @@ class HVecUShr FINAL : public HVecBinaryOperation {
DCHECK(HasConsistentPackedTypes(left, packed_type));
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(VecUShr);
@@ -854,7 +898,7 @@ class HVecUShr FINAL : public HVecBinaryOperation {
// Assigns the given scalar elements to a vector,
// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m,
// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n.
-class HVecSetScalars FINAL : public HVecOperation {
+class HVecSetScalars final : public HVecOperation {
public:
HVecSetScalars(ArenaAllocator* allocator,
HInstruction* scalars[],
@@ -877,7 +921,7 @@ class HVecSetScalars FINAL : public HVecOperation {
// Setting scalars needs to stay in place, since SIMD registers are not
// kept alive across vector loop boundaries (yet).
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
DECLARE_INSTRUCTION(VecSetScalars);
@@ -887,7 +931,10 @@ class HVecSetScalars FINAL : public HVecOperation {
// Multiplies every component in the two vectors, adds the result vector to the accumulator vector,
// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ].
-class HVecMultiplyAccumulate FINAL : public HVecOperation {
+// For floating point types, Java rounding behavior must be preserved; the products are rounded to
+// the proper precision before being added. "Fused" multiply-add operations available on several
+// architectures are not usable since they would violate Java language rules.
+class HVecMultiplyAccumulate final : public HVecOperation {
public:
HVecMultiplyAccumulate(ArenaAllocator* allocator,
InstructionKind op,
@@ -901,7 +948,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
allocator,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ 3,
+ /* number_of_inputs= */ 3,
vector_length,
dex_pc),
op_kind_(op) {
@@ -909,14 +956,17 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
DCHECK(HasConsistentPackedTypes(mul_left, packed_type));
DCHECK(HasConsistentPackedTypes(mul_right, packed_type));
+ // Remove the following if we add an architecture that supports floating point multiply-add
+ // with Java-compatible rounding.
+ DCHECK(DataType::IsIntegralType(packed_type));
SetRawInputAt(0, accumulator);
SetRawInputAt(1, mul_left);
SetRawInputAt(2, mul_right);
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecMultiplyAccumulate());
const HVecMultiplyAccumulate* o = other->AsVecMultiplyAccumulate();
return HVecOperation::InstructionDataEquals(o) && GetOpKind() == o->GetOpKind();
@@ -939,7 +989,7 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation {
// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ]) =
// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ],
// for m <= n, non-overlapping sums, and signed operands x, y.
-class HVecSADAccumulate FINAL : public HVecOperation {
+class HVecSADAccumulate final : public HVecOperation {
public:
HVecSADAccumulate(ArenaAllocator* allocator,
HInstruction* accumulator,
@@ -952,7 +1002,7 @@ class HVecSADAccumulate FINAL : public HVecOperation {
allocator,
packed_type,
SideEffects::None(),
- /* number_of_inputs */ 3,
+ /* number_of_inputs= */ 3,
vector_length,
dex_pc) {
DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
@@ -971,9 +1021,69 @@ class HVecSADAccumulate FINAL : public HVecOperation {
DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate);
};
+// Performs dot product of two vectors and adds the result to wider precision components in
+// the accumulator.
+//
+// viz. DOT_PRODUCT([ a1, .. , am], [ x1, .. , xn ], [ y1, .. , yn ]) =
+// [ a1 + sum(xi * yi), .. , am + sum(xj * yj) ],
+// for m <= n, non-overlapping sums,
+// for either both signed or both unsigned operands x, y.
+//
+// Notes:
+// - packed type reflects the type of sum reduction, not the type of the operands.
+// - IsZeroExtending() is used to determine the kind of signed/zero extension to be
+// performed for the operands.
+//
+// TODO: Support types other than kInt32 for packed type.
+class HVecDotProd final : public HVecOperation {
+ public:
+ HVecDotProd(ArenaAllocator* allocator,
+ HInstruction* accumulator,
+ HInstruction* left,
+ HInstruction* right,
+ DataType::Type packed_type,
+ bool is_zero_extending,
+ size_t vector_length,
+ uint32_t dex_pc)
+ : HVecOperation(kVecDotProd,
+ allocator,
+ packed_type,
+ SideEffects::None(),
+ /* number_of_inputs= */ 3,
+ vector_length,
+ dex_pc) {
+ DCHECK(HasConsistentPackedTypes(accumulator, packed_type));
+ DCHECK(DataType::IsIntegralType(packed_type));
+ DCHECK(left->IsVecOperation());
+ DCHECK(right->IsVecOperation());
+ DCHECK_EQ(ToSignedType(left->AsVecOperation()->GetPackedType()),
+ ToSignedType(right->AsVecOperation()->GetPackedType()));
+ SetRawInputAt(0, accumulator);
+ SetRawInputAt(1, left);
+ SetRawInputAt(2, right);
+ SetPackedFlag<kFieldHDotProdIsZeroExtending>(is_zero_extending);
+ }
+
+ bool IsZeroExtending() const { return GetPackedFlag<kFieldHDotProdIsZeroExtending>(); }
+
+ bool CanBeMoved() const override { return true; }
+
+ DECLARE_INSTRUCTION(VecDotProd);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(VecDotProd);
+
+ private:
+ // Additional packed bits.
+ static constexpr size_t kFieldHDotProdIsZeroExtending =
+ HVecOperation::kNumberOfVectorOpPackedBits;
+ static constexpr size_t kNumberOfHDotProdPackedBits = kFieldHDotProdIsZeroExtending + 1;
+ static_assert(kNumberOfHDotProdPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+};
+
// Loads a vector from memory, viz. load(mem, 1)
// yield the vector [ mem(1), .. , mem(n) ].
-class HVecLoad FINAL : public HVecMemoryOperation {
+class HVecLoad final : public HVecMemoryOperation {
public:
HVecLoad(ArenaAllocator* allocator,
HInstruction* base,
@@ -987,7 +1097,7 @@ class HVecLoad FINAL : public HVecMemoryOperation {
allocator,
packed_type,
side_effects,
- /* number_of_inputs */ 2,
+ /* number_of_inputs= */ 2,
vector_length,
dex_pc) {
SetRawInputAt(0, base);
@@ -997,9 +1107,9 @@ class HVecLoad FINAL : public HVecMemoryOperation {
bool IsStringCharAt() const { return GetPackedFlag<kFieldIsStringCharAt>(); }
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
- bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+ bool InstructionDataEquals(const HInstruction* other) const override {
DCHECK(other->IsVecLoad());
const HVecLoad* o = other->AsVecLoad();
return HVecMemoryOperation::InstructionDataEquals(o) && IsStringCharAt() == o->IsStringCharAt();
@@ -1019,7 +1129,7 @@ class HVecLoad FINAL : public HVecMemoryOperation {
// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] )
// sets mem(1) = x1, .. , mem(n) = xn.
-class HVecStore FINAL : public HVecMemoryOperation {
+class HVecStore final : public HVecMemoryOperation {
public:
HVecStore(ArenaAllocator* allocator,
HInstruction* base,
@@ -1033,7 +1143,7 @@ class HVecStore FINAL : public HVecMemoryOperation {
allocator,
packed_type,
side_effects,
- /* number_of_inputs */ 3,
+ /* number_of_inputs= */ 3,
vector_length,
dex_pc) {
DCHECK(HasConsistentPackedTypes(value, packed_type));
@@ -1043,7 +1153,7 @@ class HVecStore FINAL : public HVecMemoryOperation {
}
// A store needs to stay in place.
- bool CanBeMoved() const OVERRIDE { return false; }
+ bool CanBeMoved() const override { return false; }
DECLARE_INSTRUCTION(VecStore);
diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc
index af13449646..b0a665d704 100644
--- a/compiler/optimizing/nodes_vector_test.cc
+++ b/compiler/optimizing/nodes_vector_test.cc
@@ -401,9 +401,9 @@ TEST_F(NodesVectorTest, VectorKindMattersOnReduce) {
EXPECT_TRUE(v2->CanBeMoved());
EXPECT_TRUE(v3->CanBeMoved());
- EXPECT_EQ(HVecReduce::kSum, v1->GetKind());
- EXPECT_EQ(HVecReduce::kMin, v2->GetKind());
- EXPECT_EQ(HVecReduce::kMax, v3->GetKind());
+ EXPECT_EQ(HVecReduce::kSum, v1->GetReductionKind());
+ EXPECT_EQ(HVecReduce::kMin, v2->GetReductionKind());
+ EXPECT_EQ(HVecReduce::kMax, v3->GetReductionKind());
EXPECT_TRUE(v1->Equals(v1));
EXPECT_TRUE(v2->Equals(v2));
diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h
index 4c32be7d15..8e8fbc1581 100644
--- a/compiler/optimizing/nodes_x86.h
+++ b/compiler/optimizing/nodes_x86.h
@@ -20,7 +20,7 @@
namespace art {
// Compute the address of the method for X86 Constant area support.
-class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
+class HX86ComputeBaseMethodAddress final : public HExpression<0> {
public:
// Treat the value as an int32_t, but it is really a 32 bit native pointer.
HX86ComputeBaseMethodAddress()
@@ -30,7 +30,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
kNoDexPc) {
}
- bool CanBeMoved() const OVERRIDE { return true; }
+ bool CanBeMoved() const override { return true; }
DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress);
@@ -39,7 +39,7 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> {
};
// Load a constant value from the constant table.
-class HX86LoadFromConstantTable FINAL : public HExpression<2> {
+class HX86LoadFromConstantTable final : public HExpression<2> {
public:
HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base,
HConstant* constant)
@@ -66,7 +66,7 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> {
};
// Version of HNeg with access to the constant table for FP types.
-class HX86FPNeg FINAL : public HExpression<2> {
+class HX86FPNeg final : public HExpression<2> {
public:
HX86FPNeg(DataType::Type result_type,
HInstruction* input,
@@ -89,21 +89,21 @@ class HX86FPNeg FINAL : public HExpression<2> {
};
// X86 version of HPackedSwitch that holds a pointer to the base method address.
-class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
+class HX86PackedSwitch final : public HExpression<2> {
public:
HX86PackedSwitch(int32_t start_value,
int32_t num_entries,
HInstruction* input,
HX86ComputeBaseMethodAddress* method_base,
uint32_t dex_pc)
- : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc),
+ : HExpression(kX86PackedSwitch, SideEffects::None(), dex_pc),
start_value_(start_value),
num_entries_(num_entries) {
SetRawInputAt(0, input);
SetRawInputAt(1, method_base);
}
- bool IsControlFlow() const OVERRIDE { return true; }
+ bool IsControlFlow() const override { return true; }
int32_t GetStartValue() const { return start_value_; }
@@ -128,6 +128,92 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> {
const int32_t num_entries_;
};
+class HX86AndNot final : public HBinaryOperation {
+ public:
+ HX86AndNot(DataType::Type result_type,
+ HInstruction* left,
+ HInstruction* right,
+ uint32_t dex_pc = kNoDexPc)
+ : HBinaryOperation(kX86AndNot, result_type, left, right, SideEffects::None(), dex_pc) {
+ }
+
+ bool IsCommutative() const override { return false; }
+
+ template <typename T> static T Compute(T x, T y) { return ~x & y; }
+
+ HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const override {
+ return GetBlock()->GetGraph()->GetIntConstant(
+ Compute(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x, HLongConstant* y) const override {
+ return GetBlock()->GetGraph()->GetLongConstant(
+ Compute(x->GetValue(), y->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED,
+ HFloatConstant* y ATTRIBUTE_UNUSED) const override {
+ LOG(FATAL) << DebugName() << " is not defined for float values";
+ UNREACHABLE();
+ }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED,
+ HDoubleConstant* y ATTRIBUTE_UNUSED) const override {
+ LOG(FATAL) << DebugName() << " is not defined for double values";
+ UNREACHABLE();
+ }
+
+ DECLARE_INSTRUCTION(X86AndNot);
+
+ protected:
+ DEFAULT_COPY_CONSTRUCTOR(X86AndNot);
+};
+
+class HX86MaskOrResetLeastSetBit final : public HUnaryOperation {
+ public:
+ HX86MaskOrResetLeastSetBit(DataType::Type result_type, InstructionKind op,
+ HInstruction* input, uint32_t dex_pc = kNoDexPc)
+ : HUnaryOperation(kX86MaskOrResetLeastSetBit, result_type, input, dex_pc),
+ op_kind_(op) {
+ DCHECK_EQ(result_type, DataType::Kind(input->GetType()));
+ DCHECK(op == HInstruction::kAnd || op == HInstruction::kXor) << op;
+ }
+ template <typename T>
+ auto Compute(T x) const -> decltype(x & (x-1)) {
+ static_assert(std::is_same<decltype(x & (x-1)), decltype(x ^(x-1))>::value,
+ "Inconsistent bitwise types");
+ switch (op_kind_) {
+ case HInstruction::kAnd:
+ return x & (x-1);
+ case HInstruction::kXor:
+ return x ^ (x-1);
+ default:
+ LOG(FATAL) << "Unreachable";
+ UNREACHABLE();
+ }
+ }
+
+ HConstant* Evaluate(HIntConstant* x) const override {
+ return GetBlock()->GetGraph()->GetIntConstant(Compute(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HLongConstant* x) const override {
+ return GetBlock()->GetGraph()->GetLongConstant(Compute(x->GetValue()), GetDexPc());
+ }
+ HConstant* Evaluate(HFloatConstant* x ATTRIBUTE_UNUSED) const override {
+ LOG(FATAL) << DebugName() << "is not defined for float values";
+ UNREACHABLE();
+ }
+ HConstant* Evaluate(HDoubleConstant* x ATTRIBUTE_UNUSED) const override {
+ LOG(FATAL) << DebugName() << "is not defined for double values";
+ UNREACHABLE();
+ }
+ InstructionKind GetOpKind() const { return op_kind_; }
+
+ DECLARE_INSTRUCTION(X86MaskOrResetLeastSetBit);
+
+ protected:
+ const InstructionKind op_kind_;
+
+ DEFAULT_COPY_CONSTRUCTOR(X86MaskOrResetLeastSetBit);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_X86_H_
diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc
index 57db7a634c..8864a12301 100644
--- a/compiler/optimizing/optimization.cc
+++ b/compiler/optimizing/optimization.cc
@@ -28,10 +28,14 @@
#endif
#ifdef ART_ENABLE_CODEGEN_x86
#include "pc_relative_fixups_x86.h"
+#include "instruction_simplifier_x86.h"
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
#include "x86_memory_gen.h"
#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+#include "instruction_simplifier_x86_64.h"
+#endif
#include "bounds_check_elimination.h"
#include "cha_guard_optimization.h"
@@ -40,6 +44,7 @@
#include "constructor_fence_redundancy_elimination.h"
#include "dead_code_elimination.h"
#include "dex/code_item_accessors-inl.h"
+#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "gvn.h"
#include "induction_var_analysis.h"
@@ -83,14 +88,10 @@ const char* OptimizationPassName(OptimizationPass pass) {
return HDeadCodeElimination::kDeadCodeEliminationPassName;
case OptimizationPass::kInliner:
return HInliner::kInlinerPassName;
- case OptimizationPass::kSharpening:
- return HSharpening::kSharpeningPassName;
case OptimizationPass::kSelectGenerator:
return HSelectGenerator::kSelectGeneratorPassName;
case OptimizationPass::kInstructionSimplifier:
return InstructionSimplifier::kInstructionSimplifierPassName;
- case OptimizationPass::kIntrinsicsRecognizer:
- return IntrinsicsRecognizer::kIntrinsicsRecognizerPassName;
case OptimizationPass::kCHAGuardOptimization:
return CHAGuardOptimization::kCHAGuardOptimizationPassName;
case OptimizationPass::kCodeSinking:
@@ -116,17 +117,26 @@ const char* OptimizationPassName(OptimizationPass pass) {
#ifdef ART_ENABLE_CODEGEN_x86
case OptimizationPass::kPcRelativeFixupsX86:
return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName;
+ case OptimizationPass::kInstructionSimplifierX86:
+ return x86::InstructionSimplifierX86::kInstructionSimplifierX86PassName;
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ case OptimizationPass::kInstructionSimplifierX86_64:
+ return x86_64::InstructionSimplifierX86_64::kInstructionSimplifierX86_64PassName;
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
case OptimizationPass::kX86MemoryOperandGeneration:
return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName;
#endif
+ case OptimizationPass::kNone:
+ LOG(FATAL) << "kNone does not represent an actual pass";
+ UNREACHABLE();
}
}
-#define X(x) if (name == OptimizationPassName((x))) return (x)
+#define X(x) if (pass_name == OptimizationPassName((x))) return (x)
-OptimizationPass OptimizationPassByName(const std::string& name) {
+OptimizationPass OptimizationPassByName(const std::string& pass_name) {
X(OptimizationPass::kBoundsCheckElimination);
X(OptimizationPass::kCHAGuardOptimization);
X(OptimizationPass::kCodeSinking);
@@ -137,14 +147,12 @@ OptimizationPass OptimizationPassByName(const std::string& name) {
X(OptimizationPass::kInductionVarAnalysis);
X(OptimizationPass::kInliner);
X(OptimizationPass::kInstructionSimplifier);
- X(OptimizationPass::kIntrinsicsRecognizer);
X(OptimizationPass::kInvariantCodeMotion);
X(OptimizationPass::kLoadStoreAnalysis);
X(OptimizationPass::kLoadStoreElimination);
X(OptimizationPass::kLoopOptimization);
X(OptimizationPass::kScheduling);
X(OptimizationPass::kSelectGenerator);
- X(OptimizationPass::kSharpening);
X(OptimizationPass::kSideEffectsAnalysis);
#ifdef ART_ENABLE_CODEGEN_arm
X(OptimizationPass::kInstructionSimplifierArm);
@@ -160,7 +168,7 @@ OptimizationPass OptimizationPassByName(const std::string& name) {
X(OptimizationPass::kPcRelativeFixupsX86);
X(OptimizationPass::kX86MemoryOperandGeneration);
#endif
- LOG(FATAL) << "Cannot find optimization " << name;
+ LOG(FATAL) << "Cannot find optimization " << pass_name;
UNREACHABLE();
}
@@ -173,7 +181,6 @@ ArenaVector<HOptimization*> ConstructOptimizations(
HGraph* graph,
OptimizingCompilerStats* stats,
CodeGenerator* codegen,
- CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit,
VariableSizedHandleScope* handles) {
ArenaVector<HOptimization*> optimizations(allocator->Adapter());
@@ -187,9 +194,9 @@ ArenaVector<HOptimization*> ConstructOptimizations(
// Loop over the requested optimizations.
for (size_t i = 0; i < length; i++) {
- OptimizationPass pass = definitions[i].first;
- const char* alt_name = definitions[i].second;
- const char* name = alt_name != nullptr
+ OptimizationPass pass = definitions[i].pass;
+ const char* alt_name = definitions[i].pass_name;
+ const char* pass_name = alt_name != nullptr
? alt_name
: OptimizationPassName(pass);
HOptimization* opt = nullptr;
@@ -199,47 +206,48 @@ ArenaVector<HOptimization*> ConstructOptimizations(
// Analysis passes (kept in most recent for subsequent passes).
//
case OptimizationPass::kSideEffectsAnalysis:
- opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, name);
+ opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, pass_name);
break;
case OptimizationPass::kInductionVarAnalysis:
- opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, name);
+ opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, pass_name);
break;
case OptimizationPass::kLoadStoreAnalysis:
- opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, name);
+ opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, pass_name);
break;
//
// Passes that need prior analysis.
//
case OptimizationPass::kGlobalValueNumbering:
CHECK(most_recent_side_effects != nullptr);
- opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, name);
+ opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, pass_name);
break;
case OptimizationPass::kInvariantCodeMotion:
CHECK(most_recent_side_effects != nullptr);
- opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, name);
+ opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, pass_name);
break;
case OptimizationPass::kLoopOptimization:
CHECK(most_recent_induction != nullptr);
- opt = new (allocator) HLoopOptimization(graph, driver, most_recent_induction, stats, name);
+ opt = new (allocator) HLoopOptimization(
+ graph, &codegen->GetCompilerOptions(), most_recent_induction, stats, pass_name);
break;
case OptimizationPass::kBoundsCheckElimination:
CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
opt = new (allocator) BoundsCheckElimination(
- graph, *most_recent_side_effects, most_recent_induction, name);
+ graph, *most_recent_side_effects, most_recent_induction, pass_name);
break;
case OptimizationPass::kLoadStoreElimination:
CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
opt = new (allocator) LoadStoreElimination(
- graph, *most_recent_side_effects, *most_recent_lsa, stats, name);
+ graph, *most_recent_side_effects, *most_recent_lsa, stats, pass_name);
break;
//
// Regular passes.
//
case OptimizationPass::kConstantFolding:
- opt = new (allocator) HConstantFolding(graph, name);
+ opt = new (allocator) HConstantFolding(graph, pass_name);
break;
case OptimizationPass::kDeadCodeElimination:
- opt = new (allocator) HDeadCodeElimination(graph, stats, name);
+ opt = new (allocator) HDeadCodeElimination(graph, stats, pass_name);
break;
case OptimizationPass::kInliner: {
CodeItemDataAccessor accessor(*dex_compilation_unit.GetDexFile(),
@@ -249,40 +257,33 @@ ArenaVector<HOptimization*> ConstructOptimizations(
codegen,
dex_compilation_unit, // outer_compilation_unit
dex_compilation_unit, // outermost_compilation_unit
- driver,
handles,
stats,
accessor.RegistersSize(),
- /* total_number_of_instructions */ 0,
- /* parent */ nullptr,
- /* depth */ 0,
- name);
+ /* total_number_of_instructions= */ 0,
+ /* parent= */ nullptr,
+ /* depth= */ 0,
+ pass_name);
break;
}
- case OptimizationPass::kSharpening:
- opt = new (allocator) HSharpening(graph, codegen, driver, name);
- break;
case OptimizationPass::kSelectGenerator:
- opt = new (allocator) HSelectGenerator(graph, handles, stats, name);
+ opt = new (allocator) HSelectGenerator(graph, handles, stats, pass_name);
break;
case OptimizationPass::kInstructionSimplifier:
- opt = new (allocator) InstructionSimplifier(graph, codegen, driver, stats, name);
- break;
- case OptimizationPass::kIntrinsicsRecognizer:
- opt = new (allocator) IntrinsicsRecognizer(graph, stats, name);
+ opt = new (allocator) InstructionSimplifier(graph, codegen, stats, pass_name);
break;
case OptimizationPass::kCHAGuardOptimization:
- opt = new (allocator) CHAGuardOptimization(graph, name);
+ opt = new (allocator) CHAGuardOptimization(graph, pass_name);
break;
case OptimizationPass::kCodeSinking:
- opt = new (allocator) CodeSinking(graph, stats, name);
+ opt = new (allocator) CodeSinking(graph, stats, pass_name);
break;
case OptimizationPass::kConstructorFenceRedundancyElimination:
- opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, name);
+ opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, pass_name);
break;
case OptimizationPass::kScheduling:
opt = new (allocator) HInstructionScheduling(
- graph, driver->GetInstructionSet(), codegen, name);
+ graph, codegen->GetCompilerOptions().GetInstructionSet(), codegen, pass_name);
break;
//
// Arch-specific passes.
@@ -318,12 +319,23 @@ ArenaVector<HOptimization*> ConstructOptimizations(
DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name";
opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats);
break;
+ case OptimizationPass::kInstructionSimplifierX86:
+ opt = new (allocator) x86::InstructionSimplifierX86(graph, codegen, stats);
+ break;
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ case OptimizationPass::kInstructionSimplifierX86_64:
+ opt = new (allocator) x86_64::InstructionSimplifierX86_64(graph, codegen, stats);
+ break;
#endif
+ case OptimizationPass::kNone:
+ LOG(FATAL) << "kNone does not represent an actual pass";
+ UNREACHABLE();
} // switch
// Add each next optimization to result vector.
CHECK(opt != nullptr);
- DCHECK_STREQ(name, opt->GetPassName()); // sanity
+ DCHECK_STREQ(pass_name, opt->GetPassName()); // sanity
optimizations.push_back(opt);
}
diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h
index c170f155fa..b84e03894c 100644
--- a/compiler/optimizing/optimization.h
+++ b/compiler/optimizing/optimization.h
@@ -24,7 +24,6 @@
namespace art {
class CodeGenerator;
-class CompilerDriver;
class DexCompilationUnit;
/**
@@ -47,8 +46,9 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> {
// 'instruction_simplifier$before_codegen'.
const char* GetPassName() const { return pass_name_; }
- // Perform the analysis itself.
- virtual void Run() = 0;
+ // Perform the pass or analysis. Returns false if no optimizations occurred or no useful
+ // information was computed (this is best effort, returning true is always ok).
+ virtual bool Run() = 0;
protected:
HGraph* const graph_;
@@ -76,14 +76,12 @@ enum class OptimizationPass {
kInductionVarAnalysis,
kInliner,
kInstructionSimplifier,
- kIntrinsicsRecognizer,
kInvariantCodeMotion,
kLoadStoreAnalysis,
kLoadStoreElimination,
kLoopOptimization,
kScheduling,
kSelectGenerator,
- kSharpening,
kSideEffectsAnalysis,
#ifdef ART_ENABLE_CODEGEN_arm
kInstructionSimplifierArm,
@@ -97,25 +95,40 @@ enum class OptimizationPass {
#endif
#ifdef ART_ENABLE_CODEGEN_x86
kPcRelativeFixupsX86,
+ kInstructionSimplifierX86,
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86_64
+ kInstructionSimplifierX86_64,
#endif
#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64)
kX86MemoryOperandGeneration,
#endif
+ kNone,
+ kLast = kNone
};
// Lookup name of optimization pass.
const char* OptimizationPassName(OptimizationPass pass);
// Lookup optimization pass by name.
-OptimizationPass OptimizationPassByName(const std::string& name);
+OptimizationPass OptimizationPassByName(const std::string& pass_name);
// Optimization definition consisting of an optimization pass
-// and an optional alternative name (nullptr denotes default).
-typedef std::pair<OptimizationPass, const char*> OptimizationDef;
+// an optional alternative name (nullptr denotes default), and
+// an optional pass dependence (kNone denotes no dependence).
+struct OptimizationDef {
+ OptimizationDef(OptimizationPass p, const char* pn, OptimizationPass d)
+ : pass(p), pass_name(pn), depends_on(d) {}
+ OptimizationPass pass;
+ const char* pass_name;
+ OptimizationPass depends_on;
+};
// Helper method for optimization definition array entries.
-inline OptimizationDef OptDef(OptimizationPass pass, const char* name = nullptr) {
- return std::make_pair(pass, name);
+inline OptimizationDef OptDef(OptimizationPass pass,
+ const char* pass_name = nullptr,
+ OptimizationPass depends_on = OptimizationPass::kNone) {
+ return OptimizationDef(pass, pass_name, depends_on);
}
// Helper method to construct series of optimization passes.
@@ -133,7 +146,6 @@ ArenaVector<HOptimization*> ConstructOptimizations(
HGraph* graph,
OptimizingCompilerStats* stats,
CodeGenerator* codegen,
- CompilerDriver* driver,
const DexCompilationUnit& dex_compilation_unit,
VariableSizedHandleScope* handles);
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index d20b681b49..a52031cced 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -34,8 +34,6 @@
namespace vixl32 = vixl::aarch32;
-using vixl32::r0;
-
namespace art {
// Run the tests only on host.
@@ -47,25 +45,20 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
static constexpr bool kGenerateExpected = false;
OptimizingCFITest()
- : pool_and_allocator_(),
- opts_(),
- isa_features_(),
- graph_(nullptr),
+ : graph_(nullptr),
code_gen_(),
blocks_(GetAllocator()->Adapter()) {}
- ArenaAllocator* GetAllocator() { return pool_and_allocator_.GetAllocator(); }
-
void SetUpFrame(InstructionSet isa) {
+ OverrideInstructionSetFeatures(isa, "default");
+
// Ensure that slow-debug is off, so that there is no unexpected read-barrier check emitted.
SetRuntimeDebugFlagsEnabled(false);
// Setup simple context.
- std::string error;
- isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error);
graph_ = CreateGraph();
// Generate simple frame with some spills.
- code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
+ code_gen_ = CodeGenerator::Create(graph_, *compiler_options_);
code_gen_->GetAssembler()->cfi().SetEnabled(true);
code_gen_->InitializeCodeGenerationData();
const int frame_size = 64;
@@ -105,15 +98,15 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
const std::vector<uint8_t>& expected_asm,
const std::vector<uint8_t>& expected_cfi) {
// Get the outputs.
- const std::vector<uint8_t>& actual_asm = code_allocator_.GetMemory();
+ ArrayRef<const uint8_t> actual_asm = code_allocator_.GetMemory();
Assembler* opt_asm = code_gen_->GetAssembler();
- const std::vector<uint8_t>& actual_cfi = *(opt_asm->cfi().data());
+ ArrayRef<const uint8_t> actual_cfi(*(opt_asm->cfi().data()));
if (kGenerateExpected) {
GenerateExpected(stdout, isa, isa_str, actual_asm, actual_cfi);
} else {
- EXPECT_EQ(expected_asm, actual_asm);
- EXPECT_EQ(expected_cfi, actual_cfi);
+ EXPECT_EQ(ArrayRef<const uint8_t>(expected_asm), actual_asm);
+ EXPECT_EQ(ArrayRef<const uint8_t>(expected_cfi), actual_cfi);
}
}
@@ -135,12 +128,12 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
public:
InternalCodeAllocator() {}
- virtual uint8_t* Allocate(size_t size) {
+ uint8_t* Allocate(size_t size) override {
memory_.resize(size);
return memory_.data();
}
- const std::vector<uint8_t>& GetMemory() { return memory_; }
+ ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
private:
std::vector<uint8_t> memory_;
@@ -148,9 +141,6 @@ class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper {
DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator);
};
- ArenaPoolAndAllocator pool_and_allocator_;
- CompilerOptions opts_;
- std::unique_ptr<const InstructionSetFeatures> isa_features_;
HGraph* graph_;
std::unique_ptr<CodeGenerator> code_gen_;
ArenaVector<HBasicBlock*> blocks_;
@@ -202,6 +192,7 @@ TEST_ISA(kMips64)
#ifdef ART_ENABLE_CODEGEN_arm
TEST_F(OptimizingCFITest, kThumb2Adjust) {
+ using vixl32::r0;
std::vector<uint8_t> expected_asm(
expected_asm_kThumb2_adjust,
expected_asm_kThumb2_adjust + arraysize(expected_asm_kThumb2_adjust));
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index e42dfc10ba..f4bf11d3d3 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -26,11 +26,13 @@
#include "base/arena_allocator.h"
#include "base/arena_containers.h"
#include "base/dumpable.h"
+#include "base/logging.h"
#include "base/macros.h"
#include "base/mutex.h"
#include "base/scoped_arena_allocator.h"
#include "base/timing_logger.h"
#include "builder.h"
+#include "class_root.h"
#include "code_generator.h"
#include "compiled_method.h"
#include "compiler.h"
@@ -39,7 +41,7 @@
#include "dex/dex_file_types.h"
#include "dex/verification_results.h"
#include "dex/verified_method.h"
-#include "driver/compiler_driver-inl.h"
+#include "driver/compiled_method_storage.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "graph_checker.h"
@@ -60,6 +62,7 @@
#include "ssa_builder.h"
#include "ssa_liveness_analysis.h"
#include "ssa_phi_elimination.h"
+#include "stack_map_stream.h"
#include "utils/assembler.h"
#include "verifier/verifier_compiler_binding.h"
@@ -72,25 +75,21 @@ static constexpr const char* kPassNameSeparator = "$";
/**
* Used by the code generator, to allocate the code in a vector.
*/
-class CodeVectorAllocator FINAL : public CodeAllocator {
+class CodeVectorAllocator final : public CodeAllocator {
public:
explicit CodeVectorAllocator(ArenaAllocator* allocator)
- : memory_(allocator->Adapter(kArenaAllocCodeBuffer)),
- size_(0) {}
+ : memory_(allocator->Adapter(kArenaAllocCodeBuffer)) {}
- virtual uint8_t* Allocate(size_t size) {
- size_ = size;
+ uint8_t* Allocate(size_t size) override {
memory_.resize(size);
return &memory_[0];
}
- size_t GetSize() const { return size_; }
- const ArenaVector<uint8_t>& GetMemory() const { return memory_; }
+ ArrayRef<const uint8_t> GetMemory() const override { return ArrayRef<const uint8_t>(memory_); }
uint8_t* GetData() { return memory_.data(); }
private:
ArenaVector<uint8_t> memory_;
- size_t size_;
DISALLOW_COPY_AND_ASSIGN(CodeVectorAllocator);
};
@@ -108,21 +107,22 @@ class PassObserver : public ValueObject {
PassObserver(HGraph* graph,
CodeGenerator* codegen,
std::ostream* visualizer_output,
- CompilerDriver* compiler_driver,
+ const CompilerOptions& compiler_options,
Mutex& dump_mutex)
: graph_(graph),
+ last_seen_graph_size_(0),
cached_method_name_(),
- timing_logger_enabled_(compiler_driver->GetCompilerOptions().GetDumpTimings()),
+ timing_logger_enabled_(compiler_options.GetDumpPassTimings()),
timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
disasm_info_(graph->GetAllocator()),
visualizer_oss_(),
visualizer_output_(visualizer_output),
- visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
+ visualizer_enabled_(!compiler_options.GetDumpCfgFileName().empty()),
visualizer_(&visualizer_oss_, graph, *codegen),
visualizer_dump_mutex_(dump_mutex),
graph_in_bad_state_(false) {
if (timing_logger_enabled_ || visualizer_enabled_) {
- if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
+ if (!IsVerboseMethod(compiler_options, GetMethodName())) {
timing_logger_enabled_ = visualizer_enabled_ = false;
}
if (visualizer_enabled_) {
@@ -162,7 +162,7 @@ class PassObserver : public ValueObject {
VLOG(compiler) << "Starting pass: " << pass_name;
// Dump graph first, then start timer.
if (visualizer_enabled_) {
- visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
+ visualizer_.DumpGraph(pass_name, /* is_after_pass= */ false, graph_in_bad_state_);
FlushVisualizer();
}
if (timing_logger_enabled_) {
@@ -178,13 +178,13 @@ class PassObserver : public ValueObject {
visualizer_oss_.clear();
}
- void EndPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) {
+ void EndPass(const char* pass_name, bool pass_change) REQUIRES(!visualizer_dump_mutex_) {
// Pause timer first, then dump graph.
if (timing_logger_enabled_) {
timing_logger_.EndTiming();
}
if (visualizer_enabled_) {
- visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_);
+ visualizer_.DumpGraph(pass_name, /* is_after_pass= */ true, graph_in_bad_state_);
FlushVisualizer();
}
@@ -192,7 +192,7 @@ class PassObserver : public ValueObject {
if (kIsDebugBuild) {
if (!graph_in_bad_state_) {
GraphChecker checker(graph_);
- checker.Run();
+ last_seen_graph_size_ = checker.Run(pass_change, last_seen_graph_size_);
if (!checker.IsValid()) {
LOG(FATAL) << "Error after " << pass_name << ": " << Dumpable<GraphChecker>(checker);
}
@@ -200,11 +200,11 @@ class PassObserver : public ValueObject {
}
}
- static bool IsVerboseMethod(CompilerDriver* compiler_driver, const char* method_name) {
+ static bool IsVerboseMethod(const CompilerOptions& compiler_options, const char* method_name) {
// Test an exact match to --verbose-methods. If verbose-methods is set, this overrides an
// empty kStringFilter matching all methods.
- if (compiler_driver->GetCompilerOptions().HasVerboseMethods()) {
- return compiler_driver->GetCompilerOptions().IsVerboseMethod(method_name);
+ if (compiler_options.HasVerboseMethods()) {
+ return compiler_options.IsVerboseMethod(method_name);
}
// Test the kStringFilter sub-string. constexpr helper variable to silence unreachable-code
@@ -218,6 +218,7 @@ class PassObserver : public ValueObject {
}
HGraph* const graph_;
+ size_t last_seen_graph_size_;
std::string cached_method_name_;
@@ -245,60 +246,64 @@ class PassScope : public ValueObject {
public:
PassScope(const char *pass_name, PassObserver* pass_observer)
: pass_name_(pass_name),
+ pass_change_(true), // assume change
pass_observer_(pass_observer) {
pass_observer_->StartPass(pass_name_);
}
+ void SetPassNotChanged() {
+ pass_change_ = false;
+ }
+
~PassScope() {
- pass_observer_->EndPass(pass_name_);
+ pass_observer_->EndPass(pass_name_, pass_change_);
}
private:
const char* const pass_name_;
+ bool pass_change_;
PassObserver* const pass_observer_;
};
-class OptimizingCompiler FINAL : public Compiler {
+class OptimizingCompiler final : public Compiler {
public:
- explicit OptimizingCompiler(CompilerDriver* driver);
- ~OptimizingCompiler() OVERRIDE;
+ explicit OptimizingCompiler(const CompilerOptions& compiler_options,
+ CompiledMethodStorage* storage);
+ ~OptimizingCompiler() override;
- bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const OVERRIDE;
+ bool CanCompileMethod(uint32_t method_idx, const DexFile& dex_file) const override;
- CompiledMethod* Compile(const DexFile::CodeItem* code_item,
+ CompiledMethod* Compile(const dex::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
uint32_t method_idx,
Handle<mirror::ClassLoader> class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const OVERRIDE;
+ Handle<mirror::DexCache> dex_cache) const override;
CompiledMethod* JniCompile(uint32_t access_flags,
uint32_t method_idx,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const OVERRIDE;
+ Handle<mirror::DexCache> dex_cache) const override;
- uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE
+ uintptr_t GetEntryPointOf(ArtMethod* method) const override
REQUIRES_SHARED(Locks::mutator_lock_) {
return reinterpret_cast<uintptr_t>(method->GetEntryPointFromQuickCompiledCodePtrSize(
- InstructionSetPointerSize(GetCompilerDriver()->GetInstructionSet())));
+ InstructionSetPointerSize(GetCompilerOptions().GetInstructionSet())));
}
- void Init() OVERRIDE;
-
- void UnInit() const OVERRIDE;
-
bool JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
ArtMethod* method,
+ bool baseline,
bool osr,
jit::JitLogger* jit_logger)
- OVERRIDE
+ override
REQUIRES_SHARED(Locks::mutator_lock_);
private:
- void RunOptimizations(HGraph* graph,
+ bool RunOptimizations(HGraph* graph,
CodeGenerator* codegen,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
@@ -313,25 +318,41 @@ class OptimizingCompiler FINAL : public Compiler {
graph,
compilation_stats_.get(),
codegen,
- GetCompilerDriver(),
dex_compilation_unit,
handles);
DCHECK_EQ(length, optimizations.size());
- // Run the optimization passes one by one.
+ // Run the optimization passes one by one. Any "depends_on" pass refers back to
+ // the most recent occurrence of that pass, skipped or executed.
+ std::bitset<static_cast<size_t>(OptimizationPass::kLast) + 1u> pass_changes;
+ pass_changes[static_cast<size_t>(OptimizationPass::kNone)] = true;
+ bool change = false;
for (size_t i = 0; i < length; ++i) {
- PassScope scope(optimizations[i]->GetPassName(), pass_observer);
- optimizations[i]->Run();
+ if (pass_changes[static_cast<size_t>(definitions[i].depends_on)]) {
+ // Execute the pass and record whether it changed anything.
+ PassScope scope(optimizations[i]->GetPassName(), pass_observer);
+ bool pass_change = optimizations[i]->Run();
+ pass_changes[static_cast<size_t>(definitions[i].pass)] = pass_change;
+ if (pass_change) {
+ change = true;
+ } else {
+ scope.SetPassNotChanged();
+ }
+ } else {
+ // Skip the pass and record that nothing changed.
+ pass_changes[static_cast<size_t>(definitions[i].pass)] = false;
+ }
}
+ return change;
}
- template <size_t length> void RunOptimizations(
+ template <size_t length> bool RunOptimizations(
HGraph* graph,
CodeGenerator* codegen,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
VariableSizedHandleScope* handles,
const OptimizationDef (&definitions)[length]) const {
- RunOptimizations(
+ return RunOptimizations(
graph, codegen, dex_compilation_unit, pass_observer, handles, definitions, length);
}
@@ -346,7 +367,7 @@ class OptimizingCompiler FINAL : public Compiler {
CompiledMethod* Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- const DexFile::CodeItem* item) const;
+ const dex::CodeItem* item) const;
// Try compiling a method and return the code generator used for
// compiling it.
@@ -360,6 +381,7 @@ class OptimizingCompiler FINAL : public Compiler {
CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
+ bool baseline,
bool osr,
VariableSizedHandleScope* handles) const;
@@ -370,19 +392,20 @@ class OptimizingCompiler FINAL : public Compiler {
ArtMethod* method,
VariableSizedHandleScope* handles) const;
- void MaybeRunInliner(HGraph* graph,
- CodeGenerator* codegen,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- VariableSizedHandleScope* handles) const;
-
- void RunArchOptimizations(HGraph* graph,
+ bool RunArchOptimizations(HGraph* graph,
CodeGenerator* codegen,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
VariableSizedHandleScope* handles) const;
- void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info)
+ bool RunBaselineOptimizations(HGraph* graph,
+ CodeGenerator* codegen,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ VariableSizedHandleScope* handles) const;
+
+ void GenerateJitDebugInfo(ArtMethod* method,
+ const debug::MethodDebugInfo& method_debug_info)
REQUIRES_SHARED(Locks::mutator_lock_);
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -396,28 +419,22 @@ class OptimizingCompiler FINAL : public Compiler {
static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
-OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
- : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
- dump_mutex_("Visualizer dump lock") {}
-
-void OptimizingCompiler::Init() {
- // Enable C1visualizer output. Must be done in Init() because the compiler
- // driver is not fully initialized when passed to the compiler's constructor.
- CompilerDriver* driver = GetCompilerDriver();
- const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
+OptimizingCompiler::OptimizingCompiler(const CompilerOptions& compiler_options,
+ CompiledMethodStorage* storage)
+ : Compiler(compiler_options, storage, kMaximumCompilationTimeBeforeWarning),
+ dump_mutex_("Visualizer dump lock") {
+ // Enable C1visualizer output.
+ const std::string& cfg_file_name = compiler_options.GetDumpCfgFileName();
if (!cfg_file_name.empty()) {
std::ios_base::openmode cfg_file_mode =
- driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
+ compiler_options.GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
}
- if (driver->GetCompilerOptions().GetDumpStats()) {
+ if (compiler_options.GetDumpStats()) {
compilation_stats_.reset(new OptimizingCompilerStats());
}
}
-void OptimizingCompiler::UnInit() const {
-}
-
OptimizingCompiler::~OptimizingCompiler() {
if (compilation_stats_.get() != nullptr) {
compilation_stats_->Log();
@@ -439,33 +456,54 @@ static bool IsInstructionSetSupported(InstructionSet instruction_set) {
|| instruction_set == InstructionSet::kX86_64;
}
-void OptimizingCompiler::MaybeRunInliner(HGraph* graph,
- CodeGenerator* codegen,
- const DexCompilationUnit& dex_compilation_unit,
- PassObserver* pass_observer,
- VariableSizedHandleScope* handles) const {
- const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
- bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
- if (!should_inline) {
- return;
+bool OptimizingCompiler::RunBaselineOptimizations(HGraph* graph,
+ CodeGenerator* codegen,
+ const DexCompilationUnit& dex_compilation_unit,
+ PassObserver* pass_observer,
+ VariableSizedHandleScope* handles) const {
+ switch (codegen->GetCompilerOptions().GetInstructionSet()) {
+#ifdef ART_ENABLE_CODEGEN_mips
+ case InstructionSet::kMips: {
+ OptimizationDef mips_optimizations[] = {
+ OptDef(OptimizationPass::kPcRelativeFixupsMips)
+ };
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ mips_optimizations);
+ }
+#endif
+#ifdef ART_ENABLE_CODEGEN_x86
+ case InstructionSet::kX86: {
+ OptimizationDef x86_optimizations[] = {
+ OptDef(OptimizationPass::kPcRelativeFixupsX86),
+ };
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ x86_optimizations);
+ }
+#endif
+ default:
+ UNUSED(graph);
+ UNUSED(codegen);
+ UNUSED(dex_compilation_unit);
+ UNUSED(pass_observer);
+ UNUSED(handles);
+ return false;
}
- OptimizationDef optimizations[] = {
- OptDef(OptimizationPass::kInliner)
- };
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- optimizations);
}
-void OptimizingCompiler::RunArchOptimizations(HGraph* graph,
+bool OptimizingCompiler::RunArchOptimizations(HGraph* graph,
CodeGenerator* codegen,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
VariableSizedHandleScope* handles) const {
- switch (GetCompilerDriver()->GetInstructionSet()) {
+ switch (codegen->GetCompilerOptions().GetInstructionSet()) {
#if defined(ART_ENABLE_CODEGEN_arm)
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
@@ -475,13 +513,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kScheduling)
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- arm_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ arm_optimizations);
}
#endif
#ifdef ART_ENABLE_CODEGEN_arm64
@@ -492,13 +529,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kScheduling)
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- arm64_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ arm64_optimizations);
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips
@@ -509,13 +545,12 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kPcRelativeFixupsMips)
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- mips_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ mips_optimizations);
}
#endif
#ifdef ART_ENABLE_CODEGEN_mips64
@@ -524,50 +559,49 @@ void OptimizingCompiler::RunArchOptimizations(HGraph* graph,
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch")
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- mips64_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ mips64_optimizations);
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86
case InstructionSet::kX86: {
OptimizationDef x86_optimizations[] = {
+ OptDef(OptimizationPass::kInstructionSimplifierX86),
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kPcRelativeFixupsX86),
OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- x86_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ x86_optimizations);
}
#endif
#ifdef ART_ENABLE_CODEGEN_x86_64
case InstructionSet::kX86_64: {
OptimizationDef x86_64_optimizations[] = {
+ OptDef(OptimizationPass::kInstructionSimplifierX86_64),
OptDef(OptimizationPass::kSideEffectsAnalysis),
OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"),
OptDef(OptimizationPass::kX86MemoryOperandGeneration)
};
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- x86_64_optimizations);
- break;
+ return RunOptimizations(graph,
+ codegen,
+ dex_compilation_unit,
+ pass_observer,
+ handles,
+ x86_64_optimizations);
}
#endif
default:
- break;
+ return false;
}
}
@@ -580,7 +614,7 @@ static void AllocateRegisters(HGraph* graph,
{
PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName,
pass_observer);
- PrepareForRegisterAllocation(graph, stats).Run();
+ PrepareForRegisterAllocation(graph, codegen->GetCompilerOptions(), stats).Run();
}
// Use local allocator shared by SSA liveness analysis and register allocator.
// (Register allocator creates new objects in the liveness data.)
@@ -609,16 +643,16 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
const DexCompilationUnit& dex_compilation_unit,
PassObserver* pass_observer,
VariableSizedHandleScope* handles) const {
- const std::vector<std::string>* pass_names =
- GetCompilerDriver()->GetCompilerOptions().GetPassesToRun();
+ const std::vector<std::string>* pass_names = GetCompilerOptions().GetPassesToRun();
if (pass_names != nullptr) {
// If passes were defined on command-line, build the optimization
// passes and run these instead of the built-in optimizations.
+ // TODO: a way to define depends_on via command-line?
const size_t length = pass_names->size();
std::vector<OptimizationDef> optimizations;
for (const std::string& pass_name : *pass_names) {
std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
- optimizations.push_back(OptDef(OptimizationPassByName(opt_name.c_str()), pass_name.c_str()));
+ optimizations.push_back(OptDef(OptimizationPassByName(opt_name), pass_name.c_str()));
}
RunOptimizations(graph,
codegen,
@@ -630,49 +664,62 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
return;
}
- OptimizationDef optimizations1[] = {
- OptDef(OptimizationPass::kIntrinsicsRecognizer),
- OptDef(OptimizationPass::kSharpening),
+ OptimizationDef optimizations[] = {
+ // Initial optimizations.
OptDef(OptimizationPass::kConstantFolding),
OptDef(OptimizationPass::kInstructionSimplifier),
- OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$initial")
- };
- RunOptimizations(graph,
- codegen,
- dex_compilation_unit,
- pass_observer,
- handles,
- optimizations1);
-
- MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles);
-
- OptimizationDef optimizations2[] = {
- // SelectGenerator depends on the InstructionSimplifier removing
- // redundant suspend checks to recognize empty blocks.
- OptDef(OptimizationPass::kSelectGenerator),
- // TODO: if we don't inline we can also skip fold2.
- OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"),
- OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"),
- OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"),
- OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$initial"),
+ // Inlining.
+ OptDef(OptimizationPass::kInliner),
+ // Simplification (only if inlining occurred).
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_inlining",
+ OptimizationPass::kInliner),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_inlining",
+ OptimizationPass::kInliner),
+ // GVN.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_gvn"),
OptDef(OptimizationPass::kGlobalValueNumbering),
+ // Simplification (TODO: only if GVN occurred).
+ OptDef(OptimizationPass::kSelectGenerator),
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_gvn"),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_gvn"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$after_gvn"),
+ // High-level optimizations.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_licm"),
OptDef(OptimizationPass::kInvariantCodeMotion),
OptDef(OptimizationPass::kInductionVarAnalysis),
OptDef(OptimizationPass::kBoundsCheckElimination),
OptDef(OptimizationPass::kLoopOptimization),
- // Evaluates code generated by dynamic bce.
- OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_bce"),
- OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_bce"),
- OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_lse"),
+ // Simplification.
+ OptDef(OptimizationPass::kConstantFolding,
+ "constant_folding$after_bce"),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$after_bce"),
+ // Other high-level optimizations.
+ OptDef(OptimizationPass::kSideEffectsAnalysis,
+ "side_effects$before_lse"),
OptDef(OptimizationPass::kLoadStoreAnalysis),
OptDef(OptimizationPass::kLoadStoreElimination),
OptDef(OptimizationPass::kCHAGuardOptimization),
- OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$final"),
+ OptDef(OptimizationPass::kDeadCodeElimination,
+ "dead_code_elimination$final"),
OptDef(OptimizationPass::kCodeSinking),
// The codegen has a few assumptions that only the instruction simplifier
// can satisfy. For example, the code generator does not expect to see a
// HTypeConversion from a type to the same type.
- OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$before_codegen"),
+ OptDef(OptimizationPass::kInstructionSimplifier,
+ "instruction_simplifier$before_codegen"),
// Eliminate constructor fences after code sinking to avoid
// complicated sinking logic to split a fence with many inputs.
OptDef(OptimizationPass::kConstructorFenceRedundancyElimination)
@@ -682,7 +729,7 @@ void OptimizingCompiler::RunOptimizations(HGraph* graph,
dex_compilation_unit,
pass_observer,
handles,
- optimizations2);
+ optimizations);
RunArchOptimizations(graph, codegen, dex_compilation_unit, pass_observer, handles);
}
@@ -703,34 +750,28 @@ static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator*
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- const DexFile::CodeItem* code_item_for_osr_check) const {
+ const dex::CodeItem* code_item_for_osr_check) const {
ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
- ArenaVector<uint8_t> stack_map(allocator->Adapter(kArenaAllocStackMaps));
- ArenaVector<uint8_t> method_info(allocator->Adapter(kArenaAllocStackMaps));
- size_t stack_map_size = 0;
- size_t method_info_size = 0;
- codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
- stack_map.resize(stack_map_size);
- method_info.resize(method_info_size);
- codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()),
- MemoryRegion(method_info.data(), method_info.size()),
- code_item_for_osr_check);
+ ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item_for_osr_check);
+ CompiledMethodStorage* storage = GetCompiledMethodStorage();
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
- GetCompilerDriver(),
+ storage,
codegen->GetInstructionSet(),
- ArrayRef<const uint8_t>(code_allocator->GetMemory()),
- // Follow Quick's behavior and set the frame size to zero if it is
- // considered "empty" (see the definition of
- // art::CodeGenerator::HasEmptyFrame).
- codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
- ArrayRef<const uint8_t>(method_info),
+ code_allocator->GetMemory(),
ArrayRef<const uint8_t>(stack_map),
ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
ArrayRef<const linker::LinkerPatch>(linker_patches));
+ for (const linker::LinkerPatch& patch : linker_patches) {
+ if (codegen->NeedsThunkCode(patch) && storage->GetThunkCode(patch).empty()) {
+ ArenaVector<uint8_t> code(allocator->Adapter());
+ std::string debug_name;
+ codegen->EmitThunkCode(patch, &code, &debug_name);
+ storage->SetThunkCode(patch, ArrayRef<const uint8_t>(code), debug_name);
+ }
+ }
+
return compiled_method;
}
@@ -739,14 +780,15 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
CodeVectorAllocator* code_allocator,
const DexCompilationUnit& dex_compilation_unit,
ArtMethod* method,
+ bool baseline,
bool osr,
VariableSizedHandleScope* handles) const {
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation);
- CompilerDriver* compiler_driver = GetCompilerDriver();
- InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+ const CompilerOptions& compiler_options = GetCompilerOptions();
+ InstructionSet instruction_set = compiler_options.GetInstructionSet();
const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex();
- const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem();
+ const dex::CodeItem* code_item = dex_compilation_unit.GetCodeItem();
// Always use the Thumb-2 assembler: some runtime functionality
// (like implicit stack overflow checks) assume Thumb-2.
@@ -767,7 +809,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
// Implementation of the space filter: do not compile a code item whose size in
// code units is bigger than 128.
static constexpr size_t kSpaceFilterOptimizingThreshold = 128;
- const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions();
if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace)
&& (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() >
kSpaceFilterOptimizingThreshold)) {
@@ -776,43 +817,58 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
}
CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx);
+
+ bool dead_reference_safe;
+ ArrayRef<const uint8_t> interpreter_metadata;
+ // For AOT compilation, we may not get a method, for example if its class is erroneous,
+ // possibly due to an unavailable superclass. JIT should always have a method.
+ DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
+ if (method != nullptr) {
+ const dex::ClassDef* containing_class;
+ {
+ ScopedObjectAccess soa(Thread::Current());
+ containing_class = &method->GetClassDef();
+ interpreter_metadata = method->GetQuickenedInfo();
+ }
+ // MethodContainsRSensitiveAccess is currently slow, but HasDeadReferenceSafeAnnotation()
+ // is currently rarely true.
+ dead_reference_safe =
+ annotations::HasDeadReferenceSafeAnnotation(dex_file, *containing_class)
+ && !annotations::MethodContainsRSensitiveAccess(dex_file, *containing_class, method_idx);
+ } else {
+ // If we could not resolve the class, conservatively assume it's dead-reference unsafe.
+ dead_reference_safe = false;
+ }
+
HGraph* graph = new (allocator) HGraph(
allocator,
arena_stack,
dex_file,
method_idx,
- compiler_driver->GetInstructionSet(),
+ compiler_options.GetInstructionSet(),
kInvalidInvokeType,
- compiler_driver->GetCompilerOptions().GetDebuggable(),
- osr);
+ dead_reference_safe,
+ compiler_options.GetDebuggable(),
+ /* osr= */ osr);
- ArrayRef<const uint8_t> interpreter_metadata;
- // For AOT compilation, we may not get a method, for example if its class is erroneous.
- // JIT should always have a method.
- DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr);
if (method != nullptr) {
graph->SetArtMethod(method);
- ScopedObjectAccess soa(Thread::Current());
- interpreter_metadata = method->GetQuickenedInfo();
}
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
- instruction_set,
- *compiler_driver->GetInstructionSetFeatures(),
- compiler_driver->GetCompilerOptions(),
+ compiler_options,
compilation_stats_.get()));
if (codegen.get() == nullptr) {
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen);
return nullptr;
}
- codegen->GetAssembler()->cfi().SetEnabled(
- compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo());
+ codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
PassObserver pass_observer(graph,
codegen.get(),
visualizer_output_.get(),
- compiler_driver,
+ compiler_options,
dump_mutex_);
{
@@ -822,7 +878,6 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
code_item_accessor,
&dex_compilation_unit,
&dex_compilation_unit,
- compiler_driver,
codegen.get(),
compilation_stats_.get(),
interpreter_metadata,
@@ -833,23 +888,28 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
case kAnalysisSkipped: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledSkipped);
- }
break;
+ }
case kAnalysisInvalidBytecode: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledInvalidBytecode);
- }
break;
+ }
case kAnalysisFailThrowCatchLoop: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledThrowCatchLoop);
- }
break;
+ }
case kAnalysisFailAmbiguousArrayOp: {
MaybeRecordStat(compilation_stats_.get(),
MethodCompilationStat::kNotCompiledAmbiguousArrayOp);
+ break;
}
+ case kAnalysisFailIrreducibleLoopAndStringInit: {
+ MaybeRecordStat(compilation_stats_.get(),
+ MethodCompilationStat::kNotCompiledIrreducibleLoopAndStringInit);
break;
+ }
case kAnalysisSuccess:
UNREACHABLE();
}
@@ -858,11 +918,11 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
}
}
- RunOptimizations(graph,
- codegen.get(),
- dex_compilation_unit,
- &pass_observer,
- handles);
+ if (baseline) {
+ RunBaselineOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles);
+ } else {
+ RunOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles);
+ }
RegisterAllocator::Strategy regalloc_strategy =
compiler_options.GetRegisterAllocationStrategy();
@@ -887,8 +947,8 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
ArtMethod* method,
VariableSizedHandleScope* handles) const {
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation);
- CompilerDriver* compiler_driver = GetCompilerDriver();
- InstructionSet instruction_set = compiler_driver->GetInstructionSet();
+ const CompilerOptions& compiler_options = GetCompilerOptions();
+ InstructionSet instruction_set = compiler_options.GetInstructionSet();
const DexFile& dex_file = *dex_compilation_unit.GetDexFile();
uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex();
@@ -906,10 +966,11 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
arena_stack,
dex_file,
method_idx,
- compiler_driver->GetInstructionSet(),
+ compiler_options.GetInstructionSet(),
kInvalidInvokeType,
- compiler_driver->GetCompilerOptions().GetDebuggable(),
- /* osr */ false);
+ /* dead_reference_safe= */ true, // Intrinsics don't affect dead reference safety.
+ compiler_options.GetDebuggable(),
+ /* osr= */ false);
DCHECK(Runtime::Current()->IsAotCompiler());
DCHECK(method != nullptr);
@@ -917,20 +978,17 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
- instruction_set,
- *compiler_driver->GetInstructionSetFeatures(),
- compiler_driver->GetCompilerOptions(),
+ compiler_options,
compilation_stats_.get()));
if (codegen.get() == nullptr) {
return nullptr;
}
- codegen->GetAssembler()->cfi().SetEnabled(
- compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo());
+ codegen->GetAssembler()->cfi().SetEnabled(compiler_options.GenerateAnyDebugInfo());
PassObserver pass_observer(graph,
codegen.get(),
visualizer_output_.get(),
- compiler_driver,
+ compiler_options,
dump_mutex_);
{
@@ -940,18 +998,16 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
CodeItemDebugInfoAccessor(), // Null code item.
&dex_compilation_unit,
&dex_compilation_unit,
- compiler_driver,
codegen.get(),
compilation_stats_.get(),
- /* interpreter_metadata */ ArrayRef<const uint8_t>(),
+ /* interpreter_metadata= */ ArrayRef<const uint8_t>(),
handles);
builder.BuildIntrinsicGraph(method);
}
OptimizationDef optimizations[] = {
- OptDef(OptimizationPass::kIntrinsicsRecognizer),
- // Some intrinsics are converted to HIR by the simplifier and the codegen also
- // has a few assumptions that only the instruction simplifier can satisfy.
+ // The codegen has a few assumptions that only the instruction simplifier
+ // can satisfy.
OptDef(OptimizationPass::kInstructionSimplifier),
};
RunOptimizations(graph,
@@ -966,7 +1022,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
AllocateRegisters(graph,
codegen.get(),
&pass_observer,
- compiler_driver->GetCompilerOptions().GetRegisterAllocationStrategy(),
+ compiler_options.GetRegisterAllocationStrategy(),
compilation_stats_.get());
if (!codegen->IsLeafMethod()) {
VLOG(compiler) << "Intrinsic method is not leaf: " << method->GetIntrinsic()
@@ -983,7 +1039,7 @@ CodeGenerator* OptimizingCompiler::TryCompileIntrinsic(
return codegen.release();
}
-CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
+CompiledMethod* OptimizingCompiler::Compile(const dex::CodeItem* code_item,
uint32_t access_flags,
InvokeType invoke_type,
uint16_t class_def_idx,
@@ -991,13 +1047,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
Handle<mirror::ClassLoader> jclass_loader,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const {
- CompilerDriver* compiler_driver = GetCompilerDriver();
+ const CompilerOptions& compiler_options = GetCompilerOptions();
CompiledMethod* compiled_method = nullptr;
Runtime* runtime = Runtime::Current();
DCHECK(runtime->IsAotCompiler());
- const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx);
+ const VerifiedMethod* verified_method = compiler_options.GetVerifiedMethod(&dex_file, method_idx);
DCHECK(!verified_method->HasRuntimeThrow());
- if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) ||
+ if (compiler_options.IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) ||
verifier::CanCompilerHandleVerificationFailure(
verified_method->GetEncounteredVerificationFailures())) {
ArenaAllocator allocator(runtime->GetArenaPool());
@@ -1006,6 +1062,15 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
std::unique_ptr<CodeGenerator> codegen;
bool compiled_intrinsic = false;
{
+ ScopedObjectAccess soa(Thread::Current());
+ ArtMethod* method =
+ runtime->GetClassLinker()->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>(
+ method_idx, dex_cache, jclass_loader, /*referrer=*/ nullptr, invoke_type);
+ DCHECK_EQ(method == nullptr, soa.Self()->IsExceptionPending());
+ soa.Self()->ClearException(); // Suppress exception if any.
+ VariableSizedHandleScope handles(soa.Self());
+ Handle<mirror::Class> compiling_class =
+ handles.NewHandle(method != nullptr ? method->GetDeclaringClass() : nullptr);
DexCompilationUnit dex_compilation_unit(
jclass_loader,
runtime->GetClassLinker(),
@@ -1014,16 +1079,13 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
class_def_idx,
method_idx,
access_flags,
- /* verified_method */ nullptr, // Not needed by the Optimizing compiler.
- dex_cache);
- ScopedObjectAccess soa(Thread::Current());
- ArtMethod* method = compiler_driver->ResolveMethod(
- soa, dex_cache, jclass_loader, &dex_compilation_unit, method_idx, invoke_type);
- VariableSizedHandleScope handles(soa.Self());
+ /*verified_method=*/ nullptr, // Not needed by the Optimizing compiler.
+ dex_cache,
+ compiling_class);
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(soa.Self(), kNative);
if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
- DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
+ DCHECK(compiler_options.IsBootImage());
codegen.reset(
TryCompileIntrinsic(&allocator,
&arena_stack,
@@ -1042,7 +1104,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
&code_allocator,
dex_compilation_unit,
method,
- /* osr */ false,
+ compiler_options.IsBaseline(),
+ /* osr= */ false,
&handles));
}
}
@@ -1070,7 +1133,7 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
}
} else {
MethodCompilationStat method_stat;
- if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
+ if (compiler_options.VerifyAtRuntime()) {
method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime;
} else {
method_stat = MethodCompilationStat::kNotCompiledVerificationError;
@@ -1079,8 +1142,8 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
}
if (kIsDebugBuild &&
- IsCompilingWithCoreImage() &&
- IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
+ compiler_options.CompilingWithCoreImage() &&
+ IsInstructionSetSupported(compiler_options.GetInstructionSet())) {
// For testing purposes, we put a special marker on method names
// that should be compiled with this compiler (when the
// instruction set is supported). This makes sure we're not
@@ -1093,31 +1156,50 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item,
return compiled_method;
}
+static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* allocator,
+ const JniCompiledMethod& jni_compiled_method) {
+ // StackMapStream is quite large, so allocate it using the ScopedArenaAllocator
+ // to stay clear of the frame size limit.
+ std::unique_ptr<StackMapStream> stack_map_stream(
+ new (allocator) StackMapStream(allocator, jni_compiled_method.GetInstructionSet()));
+ stack_map_stream->BeginMethod(
+ jni_compiled_method.GetFrameSize(),
+ jni_compiled_method.GetCoreSpillMask(),
+ jni_compiled_method.GetFpSpillMask(),
+ /* num_dex_registers= */ 0);
+ stack_map_stream->EndMethod();
+ return stack_map_stream->Encode();
+}
+
CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
uint32_t method_idx,
const DexFile& dex_file,
Handle<mirror::DexCache> dex_cache) const {
- if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) {
+ Runtime* runtime = Runtime::Current();
+ ArenaAllocator allocator(runtime->GetArenaPool());
+ ArenaStack arena_stack(runtime->GetArenaPool());
+
+ const CompilerOptions& compiler_options = GetCompilerOptions();
+ if (compiler_options.IsBootImage()) {
ScopedObjectAccess soa(Thread::Current());
- Runtime* runtime = Runtime::Current();
ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod(
- method_idx, dex_cache.Get(), /* class_loader */ nullptr);
+ method_idx, dex_cache.Get(), /*class_loader=*/ nullptr);
if (method != nullptr && UNLIKELY(method->IsIntrinsic())) {
+ VariableSizedHandleScope handles(soa.Self());
ScopedNullHandle<mirror::ClassLoader> class_loader; // null means boot class path loader.
+ Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass());
DexCompilationUnit dex_compilation_unit(
class_loader,
runtime->GetClassLinker(),
dex_file,
- /* code_item */ nullptr,
- /* class_def_idx */ DexFile::kDexNoIndex16,
+ /*code_item=*/ nullptr,
+ /*class_def_idx=*/ DexFile::kDexNoIndex16,
method_idx,
access_flags,
- /* verified_method */ nullptr,
- dex_cache);
- ArenaAllocator allocator(runtime->GetArenaPool());
- ArenaStack arena_stack(runtime->GetArenaPool());
+ /*verified_method=*/ nullptr,
+ dex_cache,
+ compiling_class);
CodeVectorAllocator code_allocator(&allocator);
- VariableSizedHandleScope handles(soa.Self());
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(soa.Self(), kNative);
std::unique_ptr<CodeGenerator> codegen(
@@ -1131,7 +1213,7 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
CompiledMethod* compiled_method = Emit(&allocator,
&code_allocator,
codegen.get(),
- /* code_item_for_osr_check */ nullptr);
+ /* item= */ nullptr);
compiled_method->MarkAsIntrinsic();
return compiled_method;
}
@@ -1139,28 +1221,24 @@ CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags,
}
JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
- GetCompilerDriver(), access_flags, method_idx, dex_file);
+ compiler_options, access_flags, method_idx, dex_file);
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub);
+
+ ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
+ ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method);
return CompiledMethod::SwapAllocCompiledMethod(
- GetCompilerDriver(),
+ GetCompiledMethodStorage(),
jni_compiled_method.GetInstructionSet(),
jni_compiled_method.GetCode(),
- jni_compiled_method.GetFrameSize(),
- jni_compiled_method.GetCoreSpillMask(),
- jni_compiled_method.GetFpSpillMask(),
- /* method_info */ ArrayRef<const uint8_t>(),
- /* vmap_table */ ArrayRef<const uint8_t>(),
+ ArrayRef<const uint8_t>(stack_map),
jni_compiled_method.GetCfi(),
- /* patches */ ArrayRef<const linker::LinkerPatch>());
+ /* patches= */ ArrayRef<const linker::LinkerPatch>());
}
-Compiler* CreateOptimizingCompiler(CompilerDriver* driver) {
- return new OptimizingCompiler(driver);
-}
-
-bool IsCompilingWithCoreImage() {
- const std::string& image = Runtime::Current()->GetImageLocation();
- return CompilerDriver::IsCoreImageFilename(image);
+Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
+ CompiledMethodStorage* storage) {
+ return new OptimizingCompiler(compiler_options, storage);
}
bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
@@ -1168,23 +1246,10 @@ bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) {
return Runtime::Current() == nullptr || !Runtime::Current()->IsAotCompiler();
}
-bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee) {
- if (!Runtime::Current()->IsAotCompiler()) {
- // JIT can always encode methods in stack maps.
- return true;
- }
- if (IsSameDexFile(caller_dex_file, *callee->GetDexFile())) {
- return true;
- }
- // TODO(ngeoffray): Support more AOT cases for inlining:
- // - methods in multidex
- // - methods in boot image for on-device non-PIC compilation.
- return false;
-}
-
bool OptimizingCompiler::JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
ArtMethod* method,
+ bool baseline,
bool osr,
jit::JitLogger* jit_logger) {
StackHandleScope<3> hs(self);
@@ -1195,7 +1260,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
const DexFile* dex_file = method->GetDexFile();
const uint16_t class_def_idx = method->GetClassDefIndex();
- const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
+ const dex::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
const uint32_t method_idx = method->GetDexMethodIndex();
const uint32_t access_flags = method->GetAccessFlags();
@@ -1203,37 +1268,52 @@ bool OptimizingCompiler::JitCompile(Thread* self,
ArenaAllocator allocator(runtime->GetJitArenaPool());
if (UNLIKELY(method->IsNative())) {
+ const CompilerOptions& compiler_options = GetCompilerOptions();
JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod(
- GetCompilerDriver(), access_flags, method_idx, *dex_file);
- ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots;
+ compiler_options, access_flags, method_idx, *dex_file);
+ std::vector<Handle<mirror::Object>> roots;
ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list(
allocator.Adapter(kArenaAllocCHA));
+ ArenaStack arena_stack(runtime->GetJitArenaPool());
+ // StackMapStream is large and it does not fit into this frame, so we need helper method.
+ ScopedArenaAllocator stack_map_allocator(&arena_stack); // Will hold the stack map.
+ ScopedArenaVector<uint8_t> stack_map = CreateJniStackMap(&stack_map_allocator,
+ jni_compiled_method);
+ uint8_t* stack_map_data = nullptr;
+ uint8_t* roots_data = nullptr;
+ uint32_t data_size = code_cache->ReserveData(self,
+ stack_map.size(),
+ /* number_of_roots= */ 0,
+ method,
+ &stack_map_data,
+ &roots_data);
+ if (stack_map_data == nullptr || roots_data == nullptr) {
+ MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
+ return false;
+ }
+ memcpy(stack_map_data, stack_map.data(), stack_map.size());
+
const void* code = code_cache->CommitCode(
self,
method,
- /* stack_map_data */ nullptr,
- /* method_info_data */ nullptr,
- /* roots_data */ nullptr,
- jni_compiled_method.GetFrameSize(),
- jni_compiled_method.GetCoreSpillMask(),
- jni_compiled_method.GetFpSpillMask(),
+ stack_map_data,
+ roots_data,
jni_compiled_method.GetCode().data(),
jni_compiled_method.GetCode().size(),
- /* data_size */ 0u,
+ data_size,
osr,
roots,
- /* has_should_deoptimize_flag */ false,
+ /* has_should_deoptimize_flag= */ false,
cha_single_implementation_list);
if (code == nullptr) {
return false;
}
- const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
if (compiler_options.GenerateAnyDebugInfo()) {
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
debug::MethodDebugInfo info = {};
- DCHECK(info.custom_name.empty());
+ info.custom_name = "art_jni_trampoline";
info.dex_file = dex_file;
info.class_def_index = class_def_idx;
info.dex_method_index = method_idx;
@@ -1265,6 +1345,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
std::unique_ptr<CodeGenerator> codegen;
{
+ Handle<mirror::Class> compiling_class = handles.NewHandle(method->GetDeclaringClass());
DexCompilationUnit dex_compilation_unit(
class_loader,
runtime->GetClassLinker(),
@@ -1273,8 +1354,9 @@ bool OptimizingCompiler::JitCompile(Thread* self,
class_def_idx,
method_idx,
access_flags,
- /* verified_method */ nullptr,
- dex_cache);
+ /*verified_method=*/ nullptr,
+ dex_cache,
+ compiling_class);
// Go to native so that we don't block GC during compilation.
ScopedThreadSuspension sts(self, kNative);
@@ -1284,6 +1366,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
&code_allocator,
dex_compilation_unit,
method,
+ baseline,
osr,
&handles));
if (codegen.get() == nullptr) {
@@ -1291,55 +1374,37 @@ bool OptimizingCompiler::JitCompile(Thread* self,
}
}
- size_t stack_map_size = 0;
- size_t method_info_size = 0;
- codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
+ ScopedArenaVector<uint8_t> stack_map = codegen->BuildStackMaps(code_item);
size_t number_of_roots = codegen->GetNumberOfJitRoots();
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
- // will be visible by the GC between EmitLiterals and CommitCode. Once CommitCode is
- // executed, this array is not needed.
- Handle<mirror::ObjectArray<mirror::Object>> roots(
- hs.NewHandle(mirror::ObjectArray<mirror::Object>::Alloc(
- self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots)));
- if (roots == nullptr) {
- // Out of memory, just clear the exception to avoid any Java exception uncaught problems.
- MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
- DCHECK(self->IsExceptionPending());
- self->ClearException();
- return false;
- }
uint8_t* stack_map_data = nullptr;
- uint8_t* method_info_data = nullptr;
uint8_t* roots_data = nullptr;
uint32_t data_size = code_cache->ReserveData(self,
- stack_map_size,
- method_info_size,
+ stack_map.size(),
number_of_roots,
method,
&stack_map_data,
- &method_info_data,
&roots_data);
if (stack_map_data == nullptr || roots_data == nullptr) {
MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit);
return false;
}
- codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
- MemoryRegion(method_info_data, method_info_size),
- code_item);
- codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data);
+ memcpy(stack_map_data, stack_map.data(), stack_map.size());
+ std::vector<Handle<mirror::Object>> roots;
+ codegen->EmitJitRoots(code_allocator.GetData(), roots_data, &roots);
+ // The root Handle<>s filled by the codegen reference entries in the VariableSizedHandleScope.
+ DCHECK(std::all_of(roots.begin(),
+ roots.end(),
+ [&handles](Handle<mirror::Object> root){
+ return handles.Contains(root.GetReference());
+ }));
const void* code = code_cache->CommitCode(
self,
method,
stack_map_data,
- method_info_data,
roots_data,
- codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
- codegen->GetCoreSpillMask(),
- codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
- code_allocator.GetSize(),
+ code_allocator.GetMemory().size(),
data_size,
osr,
roots,
@@ -1352,7 +1417,7 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return false;
}
- const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+ const CompilerOptions& compiler_options = GetCompilerOptions();
if (compiler_options.GenerateAnyDebugInfo()) {
const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code);
const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode());
@@ -1369,16 +1434,16 @@ bool OptimizingCompiler::JitCompile(Thread* self,
info.is_optimized = true;
info.is_code_address_text_relative = false;
info.code_address = code_address;
- info.code_size = code_allocator.GetSize();
+ info.code_size = code_allocator.GetMemory().size();
info.frame_size_in_bytes = method_header->GetFrameSizeInBytes();
- info.code_info = stack_map_size == 0 ? nullptr : stack_map_data;
+ info.code_info = stack_map.size() == 0 ? nullptr : stack_map_data;
info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data());
GenerateJitDebugInfo(method, info);
}
Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed());
if (jit_logger != nullptr) {
- jit_logger->WriteLog(code, code_allocator.GetSize(), method);
+ jit_logger->WriteLog(code, code_allocator.GetMemory().size(), method);
}
if (kArenaAllocatorCountAllocations) {
@@ -1397,26 +1462,31 @@ bool OptimizingCompiler::JitCompile(Thread* self,
return true;
}
-void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) {
- const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions();
+void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method ATTRIBUTE_UNUSED,
+ const debug::MethodDebugInfo& info) {
+ const CompilerOptions& compiler_options = GetCompilerOptions();
DCHECK(compiler_options.GenerateAnyDebugInfo());
-
- // If both flags are passed, generate full debug info.
- const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
-
- // Create entry for the single method that we just compiled.
- std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
- GetCompilerDriver()->GetInstructionSet(),
- GetCompilerDriver()->GetInstructionSetFeatures(),
- mini_debug_info,
- ArrayRef<const debug::MethodDebugInfo>(&info, 1));
- MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_);
- AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file);
-
- VLOG(jit)
- << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method)
- << " size=" << PrettySize(elf_file.size())
- << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage());
+ TimingLogger logger("Generate JIT debug info logger", true, VLOG_IS_ON(jit));
+ {
+ TimingLogger::ScopedTiming st("Generate JIT debug info", &logger);
+
+ // If both flags are passed, generate full debug info.
+ const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo();
+
+ // Create entry for the single method that we just compiled.
+ std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT(
+ compiler_options.GetInstructionSet(),
+ compiler_options.GetInstructionSetFeatures(),
+ mini_debug_info,
+ info);
+ AddNativeDebugInfoForJit(Thread::Current(),
+ reinterpret_cast<const void*>(info.code_address),
+ elf_file,
+ debug::PackElfFileForJIT,
+ compiler_options.GetInstructionSet(),
+ compiler_options.GetInstructionSetFeatures());
+ }
+ Runtime::Current()->GetJit()->AddTimingLogger(logger);
}
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler.h b/compiler/optimizing/optimizing_compiler.h
index d8cea30a6b..cd6d684590 100644
--- a/compiler/optimizing/optimizing_compiler.h
+++ b/compiler/optimizing/optimizing_compiler.h
@@ -17,26 +17,21 @@
#ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
#define ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_H_
+#include "base/globals.h"
#include "base/mutex.h"
-#include "globals.h"
namespace art {
class ArtMethod;
class Compiler;
-class CompilerDriver;
+class CompiledMethodStorage;
+class CompilerOptions;
class DexFile;
-Compiler* CreateOptimizingCompiler(CompilerDriver* driver);
-
-// Returns whether we are compiling against a "core" image, which
-// is an indicative we are running tests. The compiler will use that
-// information for checking invariants.
-bool IsCompilingWithCoreImage();
+Compiler* CreateOptimizingCompiler(const CompilerOptions& compiler_options,
+ CompiledMethodStorage* storage);
bool EncodeArtMethodInInlineInfo(ArtMethod* method);
-bool CanEncodeInlinedMethodInStackMap(const DexFile& caller_dex_file, ArtMethod* callee)
- REQUIRES_SHARED(Locks::mutator_lock_);
} // namespace art
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 00194ff1fe..ddd57f5f1a 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -22,9 +22,10 @@
#include <string>
#include <type_traits>
+#include <android-base/logging.h>
+
#include "base/atomic.h"
#include "base/globals.h"
-#include "base/logging.h" // For VLOG_IS_ON.
namespace art {
@@ -59,6 +60,7 @@ enum class MethodCompilationStat {
kNotCompiledUnsupportedIsa,
kNotCompiledVerificationError,
kNotCompiledVerifyAtRuntime,
+ kNotCompiledIrreducibleLoopAndStringInit,
kInlinedMonomorphicCall,
kInlinedPolymorphicCall,
kMonomorphicCall,
@@ -99,6 +101,7 @@ enum class MethodCompilationStat {
kConstructorFenceRemovedLSE,
kConstructorFenceRemovedPFRA,
kConstructorFenceRemovedCFRE,
+ kBitstringTypeCheck,
kJitOutOfMemoryForCommit,
kLastStat
};
@@ -124,11 +127,6 @@ class OptimizingCompilerStats {
}
void Log() const {
- if (!kIsDebugBuild && !VLOG_IS_ON(compiler)) {
- // Log only in debug builds or if the compiler is verbose.
- return;
- }
-
uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic);
uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub);
uint32_t bytecode_attempts =
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 6dcbadba6e..e5f694109a 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -20,6 +20,7 @@
#include <memory>
#include <vector>
+#include "base/malloc_arena_pool.h"
#include "base/scoped_arena_allocator.h"
#include "builder.h"
#include "common_compiler_test.h"
@@ -28,6 +29,7 @@
#include "dex/dex_instruction.h"
#include "dex/standard_dex_file.h"
#include "driver/dex_compilation_unit.h"
+#include "graph_checker.h"
#include "handle_scope-inl.h"
#include "mirror/class_loader.h"
#include "mirror/dex_cache.h"
@@ -97,7 +99,7 @@ class ArenaPoolAndAllocator {
ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; }
private:
- ArenaPool pool_;
+ MallocArenaPool pool_;
ArenaAllocator allocator_;
ArenaStack arena_stack_;
ScopedArenaAllocator scoped_allocator_;
@@ -153,7 +155,7 @@ class OptimizingUnitTestHelper {
void* aligned_data = GetAllocator()->Alloc(code_item_size);
memcpy(aligned_data, &data[0], code_item_size);
CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment);
- const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data);
+ const dex::CodeItem* code_item = reinterpret_cast<const dex::CodeItem*>(aligned_data);
{
ScopedObjectAccess soa(Thread::Current());
@@ -163,13 +165,13 @@ class OptimizingUnitTestHelper {
const DexCompilationUnit* dex_compilation_unit =
new (graph->GetAllocator()) DexCompilationUnit(
handles_->NewHandle<mirror::ClassLoader>(nullptr),
- /* class_linker */ nullptr,
+ /* class_linker= */ nullptr,
graph->GetDexFile(),
code_item,
- /* class_def_index */ DexFile::kDexNoIndex16,
- /* method_idx */ dex::kDexNoIndex,
- /* access_flags */ 0u,
- /* verified_method */ nullptr,
+ /* class_def_index= */ DexFile::kDexNoIndex16,
+ /* method_idx= */ dex::kDexNoIndex,
+ /* access_flags= */ 0u,
+ /* verified_method= */ nullptr,
handles_->NewHandle<mirror::DexCache>(nullptr));
CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u);
HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type);
@@ -186,6 +188,77 @@ class OptimizingUnitTestHelper {
class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {};
+// OptimizingUnitTest with some handy functions to ease the graph creation.
+class ImprovedOptimizingUnitTest : public OptimizingUnitTest {
+ public:
+ ImprovedOptimizingUnitTest() : graph_(CreateGraph()),
+ entry_block_(nullptr),
+ return_block_(nullptr),
+ exit_block_(nullptr),
+ parameter_(nullptr) {}
+
+ virtual ~ImprovedOptimizingUnitTest() {}
+
+ void InitGraph() {
+ entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(entry_block_);
+ graph_->SetEntryBlock(entry_block_);
+
+ return_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(return_block_);
+
+ exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(exit_block_);
+ graph_->SetExitBlock(exit_block_);
+
+ entry_block_->AddSuccessor(return_block_);
+ return_block_->AddSuccessor(exit_block_);
+
+ parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 0,
+ DataType::Type::kInt32);
+ entry_block_->AddInstruction(parameter_);
+ return_block_->AddInstruction(new (GetAllocator()) HReturnVoid());
+ exit_block_->AddInstruction(new (GetAllocator()) HExit());
+ }
+
+ bool CheckGraph() {
+ GraphChecker checker(graph_);
+ checker.Run();
+ if (!checker.IsValid()) {
+ for (const std::string& error : checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
+ ArenaVector<HInstruction*>* current_locals) {
+ HEnvironment* environment = new (GetAllocator()) HEnvironment(
+ (GetAllocator()),
+ current_locals->size(),
+ graph_->GetArtMethod(),
+ instruction->GetDexPc(),
+ instruction);
+
+ environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals));
+ instruction->SetRawEnvironment(environment);
+ return environment;
+ }
+
+ protected:
+ HGraph* graph_;
+
+ HBasicBlock* entry_block_;
+ HBasicBlock* return_block_;
+ HBasicBlock* exit_block_;
+
+ HInstruction* parameter_;
+};
+
// Naive string diff data type.
typedef std::list<std::pair<std::string, std::string>> diff_t;
diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h
index e6e069f96e..5fadcab402 100644
--- a/compiler/optimizing/parallel_move_resolver.h
+++ b/compiler/optimizing/parallel_move_resolver.h
@@ -58,7 +58,7 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver {
virtual ~ParallelMoveResolverWithSwap() {}
// Resolve a set of parallel moves, emitting assembler instructions.
- void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
+ void EmitNativeCode(HParallelMove* parallel_move) override;
protected:
class ScratchRegisterScope : public ValueObject {
@@ -133,7 +133,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver {
virtual ~ParallelMoveResolverNoSwap() {}
// Resolve a set of parallel moves, emitting assembler instructions.
- void EmitNativeCode(HParallelMove* parallel_move) OVERRIDE;
+ void EmitNativeCode(HParallelMove* parallel_move) override;
protected:
// Called at the beginning of EmitNativeCode(). A subclass may put some architecture dependent
diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc
index cb87cabe1c..a8ab6cdd0c 100644
--- a/compiler/optimizing/parallel_move_test.cc
+++ b/compiler/optimizing/parallel_move_test.cc
@@ -15,6 +15,7 @@
*/
#include "base/arena_allocator.h"
+#include "base/malloc_arena_pool.h"
#include "nodes.h"
#include "parallel_move_resolver.h"
@@ -55,7 +56,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
explicit TestParallelMoveResolverWithSwap(ArenaAllocator* allocator)
: ParallelMoveResolverWithSwap(allocator) {}
- void EmitMove(size_t index) OVERRIDE {
+ void EmitMove(size_t index) override {
MoveOperands* move = moves_[index];
if (!message_.str().empty()) {
message_ << " ";
@@ -67,7 +68,7 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
message_ << ")";
}
- void EmitSwap(size_t index) OVERRIDE {
+ void EmitSwap(size_t index) override {
MoveOperands* move = moves_[index];
if (!message_.str().empty()) {
message_ << " ";
@@ -79,8 +80,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap {
message_ << ")";
}
- void SpillScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {}
- void RestoreScratch(int reg ATTRIBUTE_UNUSED) OVERRIDE {}
+ void SpillScratch(int reg ATTRIBUTE_UNUSED) override {}
+ void RestoreScratch(int reg ATTRIBUTE_UNUSED) override {}
std::string GetMessage() const {
return message_.str();
@@ -98,13 +99,13 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
explicit TestParallelMoveResolverNoSwap(ArenaAllocator* allocator)
: ParallelMoveResolverNoSwap(allocator), scratch_index_(kScratchRegisterStartIndexForTest) {}
- void PrepareForEmitNativeCode() OVERRIDE {
+ void PrepareForEmitNativeCode() override {
scratch_index_ = kScratchRegisterStartIndexForTest;
}
- void FinishEmitNativeCode() OVERRIDE {}
+ void FinishEmitNativeCode() override {}
- Location AllocateScratchLocationFor(Location::Kind kind) OVERRIDE {
+ Location AllocateScratchLocationFor(Location::Kind kind) override {
if (kind == Location::kStackSlot || kind == Location::kFpuRegister ||
kind == Location::kRegister) {
kind = Location::kRegister;
@@ -124,9 +125,9 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap {
return scratch;
}
- void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {}
+ void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) override {}
- void EmitMove(size_t index) OVERRIDE {
+ void EmitMove(size_t index) override {
MoveOperands* move = moves_[index];
if (!message_.str().empty()) {
message_ << " ";
@@ -173,14 +174,14 @@ class ParallelMoveTest : public ::testing::Test {
template<> const bool ParallelMoveTest<TestParallelMoveResolverWithSwap>::has_swap = true;
template<> const bool ParallelMoveTest<TestParallelMoveResolverNoSwap>::has_swap = false;
-typedef ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>
- ParallelMoveResolverTestTypes;
+using ParallelMoveResolverTestTypes =
+ ::testing::Types<TestParallelMoveResolverWithSwap, TestParallelMoveResolverNoSwap>;
TYPED_TEST_CASE(ParallelMoveTest, ParallelMoveResolverTestTypes);
TYPED_TEST(ParallelMoveTest, Dependency) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -207,7 +208,7 @@ TYPED_TEST(ParallelMoveTest, Dependency) {
}
TYPED_TEST(ParallelMoveTest, Cycle) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -257,7 +258,7 @@ TYPED_TEST(ParallelMoveTest, Cycle) {
}
TYPED_TEST(ParallelMoveTest, ConstantLast) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
TypeParam resolver(&allocator);
HParallelMove* moves = new (&allocator) HParallelMove(&allocator);
@@ -276,7 +277,7 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) {
}
TYPED_TEST(ParallelMoveTest, Pairs) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -453,7 +454,7 @@ TYPED_TEST(ParallelMoveTest, Pairs) {
}
TYPED_TEST(ParallelMoveTest, MultiCycles) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -551,7 +552,7 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) {
// Test that we do 64bits moves before 32bits moves.
TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
@@ -610,7 +611,7 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) {
}
TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaAllocator allocator(&pool);
{
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index 9d5358514e..05208ff65c 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -58,7 +58,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
DCHECK(base_ != nullptr);
}
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override {
// If this is an invoke with PC-relative load kind,
// we need to add the base as the special input.
if (invoke->HasPcRelativeMethodLoadKind() &&
@@ -70,13 +70,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
+ void VisitLoadClass(HLoadClass* load_class) override {
HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
switch (load_kind) {
case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadClass::LoadKind::kBootImageAddress:
- case HLoadClass::LoadKind::kBootImageClassTable:
+ case HLoadClass::LoadKind::kBootImageRelRo:
case HLoadClass::LoadKind::kBssEntry:
+ case HLoadClass::LoadKind::kJitBootImageAddress:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
load_class->AddSpecialInput(base_);
@@ -86,13 +86,13 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- void VisitLoadString(HLoadString* load_string) OVERRIDE {
+ void VisitLoadString(HLoadString* load_string) override {
HLoadString::LoadKind load_kind = load_string->GetLoadKind();
switch (load_kind) {
case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
- case HLoadString::LoadKind::kBootImageAddress:
- case HLoadString::LoadKind::kBootImageInternTable:
+ case HLoadString::LoadKind::kBootImageRelRo:
case HLoadString::LoadKind::kBssEntry:
+ case HLoadString::LoadKind::kJitBootImageAddress:
// Add a base register for PC-relative literals on R2.
InitializePCRelativeBasePointer();
load_string->AddSpecialInput(base_);
@@ -102,7 +102,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+ void VisitPackedSwitch(HPackedSwitch* switch_insn) override {
if (switch_insn->GetNumEntries() <=
InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) {
return;
@@ -128,20 +128,21 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
HMipsComputeBaseMethodAddress* base_;
};
-void PcRelativeFixups::Run() {
+bool PcRelativeFixups::Run() {
CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
// Do nothing for R6 because it has PC-relative addressing.
- return;
+ return false;
}
if (graph_->HasIrreducibleLoops()) {
// Do not run this optimization, as irreducible loops do not work with an instruction
// that can be live-in at the irreducible loop header.
- return;
+ return false;
}
PCRelativeHandlerVisitor visitor(graph_, codegen_);
visitor.VisitInsertionOrder();
visitor.MoveBaseIfNeeded();
+ return true;
}
} // namespace mips
diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h
index ec2c711f8d..872370bcb7 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.h
+++ b/compiler/optimizing/pc_relative_fixups_mips.h
@@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization {
static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips";
- void Run() OVERRIDE;
+ bool Run() override;
private:
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index f92f4b274a..1d8d1a6e90 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -41,60 +41,54 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
private:
- void VisitAdd(HAdd* add) OVERRIDE {
+ void VisitAdd(HAdd* add) override {
BinaryFP(add);
}
- void VisitSub(HSub* sub) OVERRIDE {
+ void VisitSub(HSub* sub) override {
BinaryFP(sub);
}
- void VisitMul(HMul* mul) OVERRIDE {
+ void VisitMul(HMul* mul) override {
BinaryFP(mul);
}
- void VisitDiv(HDiv* div) OVERRIDE {
+ void VisitDiv(HDiv* div) override {
BinaryFP(div);
}
- void VisitCompare(HCompare* compare) OVERRIDE {
+ void VisitCompare(HCompare* compare) override {
BinaryFP(compare);
}
- void VisitReturn(HReturn* ret) OVERRIDE {
+ void VisitReturn(HReturn* ret) override {
HConstant* value = ret->InputAt(0)->AsConstant();
if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) {
ReplaceInput(ret, value, 0, true);
}
}
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE {
+ void VisitInvokeVirtual(HInvokeVirtual* invoke) override {
HandleInvoke(invoke);
}
- void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE {
+ void VisitInvokeInterface(HInvokeInterface* invoke) override {
HandleInvoke(invoke);
}
- void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
- HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
- if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadClass::LoadKind::kBootImageClassTable ||
- load_kind == HLoadClass::LoadKind::kBssEntry) {
+ void VisitLoadClass(HLoadClass* load_class) override {
+ if (load_class->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class);
load_class->AddSpecialInput(method_address);
}
}
- void VisitLoadString(HLoadString* load_string) OVERRIDE {
- HLoadString::LoadKind load_kind = load_string->GetLoadKind();
- if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
- load_kind == HLoadString::LoadKind::kBootImageInternTable ||
- load_kind == HLoadString::LoadKind::kBssEntry) {
+ void VisitLoadString(HLoadString* load_string) override {
+ if (load_string->HasPcRelativeLoadKind()) {
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string);
load_string->AddSpecialInput(method_address);
}
@@ -107,31 +101,31 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- void VisitEqual(HEqual* cond) OVERRIDE {
+ void VisitEqual(HEqual* cond) override {
BinaryFP(cond);
}
- void VisitNotEqual(HNotEqual* cond) OVERRIDE {
+ void VisitNotEqual(HNotEqual* cond) override {
BinaryFP(cond);
}
- void VisitLessThan(HLessThan* cond) OVERRIDE {
+ void VisitLessThan(HLessThan* cond) override {
BinaryFP(cond);
}
- void VisitLessThanOrEqual(HLessThanOrEqual* cond) OVERRIDE {
+ void VisitLessThanOrEqual(HLessThanOrEqual* cond) override {
BinaryFP(cond);
}
- void VisitGreaterThan(HGreaterThan* cond) OVERRIDE {
+ void VisitGreaterThan(HGreaterThan* cond) override {
BinaryFP(cond);
}
- void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) OVERRIDE {
+ void VisitGreaterThanOrEqual(HGreaterThanOrEqual* cond) override {
BinaryFP(cond);
}
- void VisitNeg(HNeg* neg) OVERRIDE {
+ void VisitNeg(HNeg* neg) override {
if (DataType::IsFloatingPointType(neg->GetType())) {
// We need to replace the HNeg with a HX86FPNeg in order to address the constant area.
HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(neg);
@@ -146,7 +140,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- void VisitPackedSwitch(HPackedSwitch* switch_insn) OVERRIDE {
+ void VisitPackedSwitch(HPackedSwitch* switch_insn) override {
if (switch_insn->GetNumEntries() <=
InstructionCodeGeneratorX86::kPackedSwitchJumpTableThreshold) {
return;
@@ -199,18 +193,19 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
void HandleInvoke(HInvoke* invoke) {
- // If this is an invoke-static/-direct with PC-relative dex cache array
- // addressing, we need the PC-relative address base.
HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
- // We can't add a pointer to the constant area if we already have a current
- // method pointer. This may arise when sharpening doesn't remove the current
- // method pointer from the invoke.
- if (invoke_static_or_direct != nullptr &&
- invoke_static_or_direct->HasCurrentMethodInput()) {
+
+ // We can't add the method address if we already have a current method pointer.
+ // This may arise when sharpening doesn't remove the current method pointer from the invoke.
+ if (invoke_static_or_direct != nullptr && invoke_static_or_direct->HasCurrentMethodInput()) {
+ // Note: This happens only for recursive calls (including compiling an intrinsic
+ // by faking a call to itself; we use kRuntimeCall for this case).
DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
return;
}
+ // If this is an invoke-static/-direct with PC-relative addressing (within boot image
+ // or using .bss or .data.bimg.rel.ro), we need the PC-relative address base.
bool base_added = false;
if (invoke_static_or_direct != nullptr &&
invoke_static_or_direct->HasPcRelativeMethodLoadKind() &&
@@ -230,15 +225,16 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
}
}
- // These intrinsics need the constant area.
switch (invoke->GetIntrinsic()) {
- case Intrinsics::kMathAbsDouble:
- case Intrinsics::kMathAbsFloat:
- case Intrinsics::kMathMaxDoubleDouble:
- case Intrinsics::kMathMaxFloatFloat:
- case Intrinsics::kMathMinDoubleDouble:
- case Intrinsics::kMathMinFloatFloat:
+ case Intrinsics::kIntegerValueOf:
+ // This intrinsic can be call free if it loads the address of the boot image object.
+ // If we're compiling PIC, we need the address base for loading from .data.bimg.rel.ro.
+ if (!codegen_->GetCompilerOptions().GetCompilePic()) {
+ break;
+ }
+ FALLTHROUGH_INTENDED;
case Intrinsics::kMathRoundFloat:
+ // This intrinsic needs the constant area.
if (!base_added) {
DCHECK(invoke_static_or_direct != nullptr);
DCHECK(!invoke_static_or_direct->HasCurrentMethodInput());
@@ -259,10 +255,11 @@ class PCRelativeHandlerVisitor : public HGraphVisitor {
HX86ComputeBaseMethodAddress* base_;
};
-void PcRelativeFixups::Run() {
+bool PcRelativeFixups::Run() {
PCRelativeHandlerVisitor visitor(graph_, codegen_);
visitor.VisitInsertionOrder();
visitor.MoveBaseIfNeeded();
+ return true;
}
} // namespace x86
diff --git a/compiler/optimizing/pc_relative_fixups_x86.h b/compiler/optimizing/pc_relative_fixups_x86.h
index 72fa71ea94..3b470a6502 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.h
+++ b/compiler/optimizing/pc_relative_fixups_x86.h
@@ -34,7 +34,7 @@ class PcRelativeFixups : public HOptimization {
static constexpr const char* kPcRelativeFixupsX86PassName = "pc_relative_fixups_x86";
- void Run() OVERRIDE;
+ bool Run() override;
private:
CodeGenerator* codegen_;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index f843c008d8..fbdbf9d086 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -17,7 +17,8 @@
#include "prepare_for_register_allocation.h"
#include "dex/dex_file_types.h"
-#include "jni_internal.h"
+#include "driver/compiler_options.h"
+#include "jni/jni_internal.h"
#include "optimizing_compiler_stats.h"
#include "well_known_classes.h"
@@ -27,15 +28,42 @@ void PrepareForRegisterAllocation::Run() {
// Order does not matter.
for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
// No need to visit the phis.
- for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
+ for (HInstructionIteratorHandleChanges inst_it(block->GetInstructions()); !inst_it.Done();
inst_it.Advance()) {
inst_it.Current()->Accept(this);
}
}
}
+void PrepareForRegisterAllocation::VisitCheckCast(HCheckCast* check_cast) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (check_cast->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
+void PrepareForRegisterAllocation::VisitInstanceOf(HInstanceOf* instance_of) {
+ // Record only those bitstring type checks that make it to the codegen stage.
+ if (instance_of->GetTypeCheckKind() == TypeCheckKind::kBitstringCheck) {
+ MaybeRecordStat(stats_, MethodCompilationStat::kBitstringTypeCheck);
+ }
+}
+
void PrepareForRegisterAllocation::VisitNullCheck(HNullCheck* check) {
check->ReplaceWith(check->InputAt(0));
+ if (compiler_options_.GetImplicitNullChecks()) {
+ HInstruction* next = check->GetNext();
+
+ // The `PrepareForRegisterAllocation` pass removes `HBoundType` from the graph,
+ // so do it ourselves now to not prevent optimizations.
+ while (next->IsBoundType()) {
+ next = next->GetNext();
+ VisitBoundType(next->GetPrevious()->AsBoundType());
+ }
+ if (next->CanDoImplicitNullCheckOn(check->InputAt(0))) {
+ check->MarkEmittedAtUseSite();
+ }
+ }
}
void PrepareForRegisterAllocation::VisitDivZeroCheck(HDivZeroCheck* check) {
@@ -59,9 +87,9 @@ void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
if (GetGraph()->GetArtMethod() != char_at_method) {
ArenaAllocator* allocator = GetGraph()->GetAllocator();
HEnvironment* environment = new (allocator) HEnvironment(allocator,
- /* number_of_vregs */ 0u,
+ /* number_of_vregs= */ 0u,
char_at_method,
- /* dex_pc */ dex::kDexNoIndex,
+ /* dex_pc= */ dex::kDexNoIndex,
check);
check->InsertRawEnvironment(environment);
}
@@ -136,7 +164,9 @@ void PrepareForRegisterAllocation::VisitClinitCheck(HClinitCheck* check) {
if (can_merge_with_load_class && !load_class->HasUses()) {
load_class->GetBlock()->RemoveInstruction(load_class);
}
- } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
+ } else if (can_merge_with_load_class &&
+ load_class->GetLoadKind() != HLoadClass::LoadKind::kRuntimeCall) {
+ DCHECK(!load_class->NeedsAccessCheck());
// Pass the initialization duty to the `HLoadClass` instruction,
// and remove the instruction from the graph.
DCHECK(load_class->HasEnvironment());
@@ -274,4 +304,13 @@ bool PrepareForRegisterAllocation::CanMoveClinitCheck(HInstruction* input,
return true;
}
+void PrepareForRegisterAllocation::VisitTypeConversion(HTypeConversion* instruction) {
+ // For simplicity, our code generators don't handle implicit type conversion, so ensure
+ // there are none before hitting codegen.
+ if (instruction->IsImplicitConversion()) {
+ instruction->ReplaceWith(instruction->GetInput());
+ instruction->GetBlock()->RemoveInstruction(instruction);
+ }
+}
+
} // namespace art
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index 2c64f016c1..e0bb76eb22 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -21,6 +21,7 @@
namespace art {
+class CompilerOptions;
class OptimizingCompilerStats;
/**
@@ -30,9 +31,11 @@ class OptimizingCompilerStats;
*/
class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
public:
- explicit PrepareForRegisterAllocation(HGraph* graph,
- OptimizingCompilerStats* stats = nullptr)
- : HGraphDelegateVisitor(graph, stats) {}
+ PrepareForRegisterAllocation(HGraph* graph,
+ const CompilerOptions& compiler_options,
+ OptimizingCompilerStats* stats = nullptr)
+ : HGraphDelegateVisitor(graph, stats),
+ compiler_options_(compiler_options) {}
void Run();
@@ -40,20 +43,25 @@ class PrepareForRegisterAllocation : public HGraphDelegateVisitor {
"prepare_for_register_allocation";
private:
- void VisitNullCheck(HNullCheck* check) OVERRIDE;
- void VisitDivZeroCheck(HDivZeroCheck* check) OVERRIDE;
- void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE;
- void VisitBoundType(HBoundType* bound_type) OVERRIDE;
- void VisitArraySet(HArraySet* instruction) OVERRIDE;
- void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
- void VisitCondition(HCondition* condition) OVERRIDE;
- void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE;
- void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
- void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
+ void VisitCheckCast(HCheckCast* check_cast) override;
+ void VisitInstanceOf(HInstanceOf* instance_of) override;
+ void VisitNullCheck(HNullCheck* check) override;
+ void VisitDivZeroCheck(HDivZeroCheck* check) override;
+ void VisitBoundsCheck(HBoundsCheck* check) override;
+ void VisitBoundType(HBoundType* bound_type) override;
+ void VisitArraySet(HArraySet* instruction) override;
+ void VisitClinitCheck(HClinitCheck* check) override;
+ void VisitCondition(HCondition* condition) override;
+ void VisitConstructorFence(HConstructorFence* constructor_fence) override;
+ void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) override;
+ void VisitDeoptimize(HDeoptimize* deoptimize) override;
+ void VisitTypeConversion(HTypeConversion* instruction) override;
bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
+ const CompilerOptions& compiler_options_;
+
DISALLOW_COPY_AND_ASSIGN(PrepareForRegisterAllocation);
};
diff --git a/compiler/optimizing/pretty_printer.h b/compiler/optimizing/pretty_printer.h
index c6579dc5e0..8ef9ce4e8b 100644
--- a/compiler/optimizing/pretty_printer.h
+++ b/compiler/optimizing/pretty_printer.h
@@ -33,7 +33,7 @@ class HPrettyPrinter : public HGraphVisitor {
PrintString(": ");
}
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
PrintPreInstruction(instruction);
PrintString(instruction->DebugName());
PrintPostInstruction(instruction);
@@ -70,7 +70,7 @@ class HPrettyPrinter : public HGraphVisitor {
PrintNewLine();
}
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
PrintString("BasicBlock ");
PrintInt(block->GetBlockId());
const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors();
@@ -108,15 +108,15 @@ class StringPrettyPrinter : public HPrettyPrinter {
explicit StringPrettyPrinter(HGraph* graph)
: HPrettyPrinter(graph), str_(""), current_block_(nullptr) { }
- void PrintInt(int value) OVERRIDE {
+ void PrintInt(int value) override {
str_ += android::base::StringPrintf("%d", value);
}
- void PrintString(const char* value) OVERRIDE {
+ void PrintString(const char* value) override {
str_ += value;
}
- void PrintNewLine() OVERRIDE {
+ void PrintNewLine() override {
str_ += '\n';
}
@@ -124,12 +124,12 @@ class StringPrettyPrinter : public HPrettyPrinter {
std::string str() const { return str_; }
- void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
+ void VisitBasicBlock(HBasicBlock* block) override {
current_block_ = block;
HPrettyPrinter::VisitBasicBlock(block);
}
- void VisitGoto(HGoto* gota) OVERRIDE {
+ void VisitGoto(HGoto* gota) override {
PrintString(" ");
PrintInt(gota->GetId());
PrintString(": Goto ");
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 67a61fc01d..4929e0a3a1 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -22,6 +22,7 @@
#include "base/scoped_arena_containers.h"
#include "base/enums.h"
#include "class_linker-inl.h"
+#include "class_root.h"
#include "handle_scope-inl.h"
#include "mirror/class-inl.h"
#include "mirror/dex_cache.h"
@@ -40,31 +41,40 @@ static inline ObjPtr<mirror::DexCache> FindDexCacheWithHint(
}
static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles,
- ClassLinker::ClassRoot class_root,
+ ClassRoot class_root,
ReferenceTypeInfo::TypeHandle* cache) {
if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
// Mutator lock is required for NewHandle.
- ClassLinker* linker = Runtime::Current()->GetClassLinker();
ScopedObjectAccess soa(Thread::Current());
- *cache = handles->NewHandle(linker->GetClassRoot(class_root));
+ *cache = handles->NewHandle(GetClassRoot(class_root));
}
return *cache;
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetObjectClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangObject, &object_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangObject, &object_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetClassClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangClass, &class_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangClass, &class_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodHandleClassHandle() {
+ return GetRootHandle(handles_,
+ ClassRoot::kJavaLangInvokeMethodHandleImpl,
+ &method_handle_class_handle_);
+}
+
+ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetMethodTypeClassHandle() {
+ return GetRootHandle(handles_, ClassRoot::kJavaLangInvokeMethodType, &method_type_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetStringClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangString, &string_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangString, &string_class_handle_);
}
ReferenceTypeInfo::TypeHandle ReferenceTypePropagation::HandleCache::GetThrowableClassHandle() {
- return GetRootHandle(handles_, ClassLinker::kJavaLangThrowable, &throwable_class_handle_);
+ return GetRootHandle(handles_, ClassRoot::kJavaLangThrowable, &throwable_class_handle_);
}
class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
@@ -84,26 +94,29 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor {
worklist_.reserve(kDefaultWorklistSize);
}
- void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE;
- void VisitNewInstance(HNewInstance* new_instance) OVERRIDE;
- void VisitLoadClass(HLoadClass* load_class) OVERRIDE;
- void VisitClinitCheck(HClinitCheck* clinit_check) OVERRIDE;
- void VisitLoadString(HLoadString* instr) OVERRIDE;
- void VisitLoadException(HLoadException* instr) OVERRIDE;
- void VisitNewArray(HNewArray* instr) OVERRIDE;
- void VisitParameterValue(HParameterValue* instr) OVERRIDE;
- void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE;
- void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE;
- void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE;
- void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) OVERRIDE;
- void VisitInvoke(HInvoke* instr) OVERRIDE;
- void VisitArrayGet(HArrayGet* instr) OVERRIDE;
- void VisitCheckCast(HCheckCast* instr) OVERRIDE;
- void VisitBoundType(HBoundType* instr) OVERRIDE;
- void VisitNullCheck(HNullCheck* instr) OVERRIDE;
- void VisitPhi(HPhi* phi);
-
- void VisitBasicBlock(HBasicBlock* block);
+ void VisitDeoptimize(HDeoptimize* deopt) override;
+ void VisitNewInstance(HNewInstance* new_instance) override;
+ void VisitLoadClass(HLoadClass* load_class) override;
+ void VisitInstanceOf(HInstanceOf* load_class) override;
+ void VisitClinitCheck(HClinitCheck* clinit_check) override;
+ void VisitLoadMethodHandle(HLoadMethodHandle* instr) override;
+ void VisitLoadMethodType(HLoadMethodType* instr) override;
+ void VisitLoadString(HLoadString* instr) override;
+ void VisitLoadException(HLoadException* instr) override;
+ void VisitNewArray(HNewArray* instr) override;
+ void VisitParameterValue(HParameterValue* instr) override;
+ void VisitInstanceFieldGet(HInstanceFieldGet* instr) override;
+ void VisitStaticFieldGet(HStaticFieldGet* instr) override;
+ void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) override;
+ void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instr) override;
+ void VisitInvoke(HInvoke* instr) override;
+ void VisitArrayGet(HArrayGet* instr) override;
+ void VisitCheckCast(HCheckCast* instr) override;
+ void VisitBoundType(HBoundType* instr) override;
+ void VisitNullCheck(HNullCheck* instr) override;
+ void VisitPhi(HPhi* phi) override;
+
+ void VisitBasicBlock(HBasicBlock* block) override;
void ProcessWorklist();
private:
@@ -171,6 +184,12 @@ void ReferenceTypePropagation::ValidateTypes() {
<< "NullCheck " << instr->GetReferenceTypeInfo()
<< "Input(0) " << instr->InputAt(0)->GetReferenceTypeInfo();
}
+ } else if (instr->IsInstanceOf()) {
+ HInstanceOf* iof = instr->AsInstanceOf();
+ DCHECK(!iof->GetTargetClassRTI().IsValid() || iof->GetTargetClassRTI().IsExact());
+ } else if (instr->IsCheckCast()) {
+ HCheckCast* check = instr->AsCheckCast();
+ DCHECK(!check->GetTargetClassRTI().IsValid() || check->GetTargetClassRTI().IsExact());
}
}
}
@@ -259,7 +278,7 @@ static void BoundTypeIn(HInstruction* receiver,
if (ShouldCreateBoundType(
insert_point, receiver, class_rti, start_instruction, start_block)) {
bound_type = new (receiver->GetBlock()->GetGraph()->GetAllocator()) HBoundType(receiver);
- bound_type->SetUpperBound(class_rti, /* bound_can_be_null */ false);
+ bound_type->SetUpperBound(class_rti, /* can_be_null= */ false);
start_block->InsertInstructionBefore(bound_type, insert_point);
// To comply with the RTP algorithm, don't type the bound type just yet, it will
// be handled in RTPVisitor::VisitBoundType.
@@ -320,8 +339,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
{
ScopedObjectAccess soa(Thread::Current());
- ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
- ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0);
+ ArtField* field = GetClassRoot<mirror::Object>()->GetInstanceField(0);
DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_");
if (field_get->GetFieldInfo().GetField() != field) {
return;
@@ -332,7 +350,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
HBasicBlock* trueBlock = compare->IsEqual()
? check->AsIf()->IfTrueSuccessor()
: check->AsIf()->IfFalseSuccessor();
- BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
+ BoundTypeIn(receiver, trueBlock, /* start_instruction= */ nullptr, class_rti);
} else {
DCHECK(check->IsDeoptimize());
if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
@@ -341,7 +359,7 @@ static void BoundTypeForClassCheck(HInstruction* check) {
}
}
-void ReferenceTypePropagation::Run() {
+bool ReferenceTypePropagation::Run() {
RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, &handle_cache_, is_first_run_);
// To properly propagate type info we need to visit in the dominator-based order.
@@ -353,6 +371,7 @@ void ReferenceTypePropagation::Run() {
visitor.ProcessWorklist();
ValidateTypes();
+ return true;
}
void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) {
@@ -408,9 +427,9 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* bl
: ifInstruction->IfFalseSuccessor();
ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create(
- handle_cache_->GetObjectClassHandle(), /* is_exact */ false);
+ handle_cache_->GetObjectClassHandle(), /* is_exact= */ false);
- BoundTypeIn(obj, notNullBlock, /* start_instruction */ nullptr, object_rti);
+ BoundTypeIn(obj, notNullBlock, /* start_instruction= */ nullptr, object_rti);
}
// Returns true if one of the patterns below has been recognized. If so, the
@@ -499,8 +518,7 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*
return;
}
- HLoadClass* load_class = instanceOf->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
+ ReferenceTypeInfo class_rti = instanceOf->GetTargetClassRTI();
if (!class_rti.IsValid()) {
// He have loaded an unresolved class. Don't bother bounding the type.
return;
@@ -520,10 +538,10 @@ void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock*
{
ScopedObjectAccess soa(Thread::Current());
if (!class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes()) {
- class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false);
+ class_rti = ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false);
}
}
- BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction */ nullptr, class_rti);
+ BoundTypeIn(obj, instanceOfTrueBlock, /* start_instruction= */ nullptr, class_rti);
}
void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* instr,
@@ -543,9 +561,9 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst
// Use a null loader, the target method is in a boot classpath dex file.
Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>(
- dex_method_index, dex_cache, loader, /* referrer */ nullptr, kDirect);
+ dex_method_index, dex_cache, loader, /* referrer= */ nullptr, kDirect);
DCHECK(method != nullptr);
- mirror::Class* declaring_class = method->GetDeclaringClass();
+ ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass();
DCHECK(declaring_class != nullptr);
DCHECK(declaring_class->IsStringClass())
<< "Expected String class: " << declaring_class->PrettyDescriptor();
@@ -553,8 +571,8 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst
<< "Expected String.<init>: " << method->PrettyMethod();
}
instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
- } else if (IsAdmissible(klass.Ptr())) {
+ ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true));
+ } else if (IsAdmissible(klass)) {
ReferenceTypeInfo::TypeHandle handle = handle_cache_->NewHandle(klass);
is_exact = is_exact || handle->CannotBeAssignedFromOtherTypes();
instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(handle, is_exact));
@@ -582,12 +600,12 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction*
void ReferenceTypePropagation::RTPVisitor::VisitNewInstance(HNewInstance* instr) {
ScopedObjectAccess soa(Thread::Current());
- SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true);
+ SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true);
}
void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) {
ScopedObjectAccess soa(Thread::Current());
- SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact */ true);
+ SetClassAsTypeInfo(instr, instr->GetLoadClass()->GetClass().Get(), /* is_exact= */ true);
}
void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) {
@@ -596,7 +614,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue*
UpdateReferenceTypeInfo(instr,
instr->GetTypeIndex(),
instr->GetDexFile(),
- /* is_exact */ false);
+ /* is_exact= */ false);
}
}
@@ -614,7 +632,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio
klass = info.GetField()->LookupResolvedType();
}
- SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+ SetClassAsTypeInfo(instr, klass, /* is_exact= */ false);
}
void ReferenceTypePropagation::RTPVisitor::VisitInstanceFieldGet(HInstanceFieldGet* instr) {
@@ -643,36 +661,52 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet(
void ReferenceTypePropagation::RTPVisitor::VisitLoadClass(HLoadClass* instr) {
ScopedObjectAccess soa(Thread::Current());
- Handle<mirror::Class> resolved_class = instr->GetClass();
- if (IsAdmissible(resolved_class.Get())) {
- instr->SetLoadedClassRTI(ReferenceTypeInfo::Create(
- resolved_class, /* is_exact */ true));
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidLoadedClassRTI();
}
instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact */ true));
+ ReferenceTypeInfo::Create(handle_cache_->GetClassClassHandle(), /* is_exact= */ true));
+}
+
+void ReferenceTypePropagation::RTPVisitor::VisitInstanceOf(HInstanceOf* instr) {
+ ScopedObjectAccess soa(Thread::Current());
+ if (IsAdmissible(instr->GetClass().Get())) {
+ instr->SetValidTargetClassRTI();
+ }
}
void ReferenceTypePropagation::RTPVisitor::VisitClinitCheck(HClinitCheck* instr) {
instr->SetReferenceTypeInfo(instr->InputAt(0)->GetReferenceTypeInfo());
}
+void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodHandle(HLoadMethodHandle* instr) {
+ instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(
+ handle_cache_->GetMethodHandleClassHandle(),
+ /* is_exact= */ true));
+}
+
+void ReferenceTypePropagation::RTPVisitor::VisitLoadMethodType(HLoadMethodType* instr) {
+ instr->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(handle_cache_->GetMethodTypeClassHandle(), /* is_exact= */ true));
+}
+
void ReferenceTypePropagation::RTPVisitor::VisitLoadString(HLoadString* instr) {
instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
+ ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact= */ true));
}
void ReferenceTypePropagation::RTPVisitor::VisitLoadException(HLoadException* instr) {
DCHECK(instr->GetBlock()->IsCatchBlock());
TryCatchInformation* catch_info = instr->GetBlock()->GetTryCatchInformation();
- if (catch_info->IsCatchAllTypeIndex()) {
- instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact */ false));
- } else {
+ if (catch_info->IsValidTypeIndex()) {
UpdateReferenceTypeInfo(instr,
catch_info->GetCatchTypeIndex(),
catch_info->GetCatchDexFile(),
- /* is_exact */ false);
+ /* is_exact= */ false);
+ } else {
+ instr->SetReferenceTypeInfo(
+ ReferenceTypeInfo::Create(handle_cache_->GetThrowableClassHandle(), /* is_exact= */ false));
}
}
@@ -702,7 +736,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
// bound type is dead. To not confuse potential other optimizations, we mark
// the bound as non-exact.
instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact */ false));
+ ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), /* is_exact= */ false));
}
} else {
// Object not typed yet. Leave BoundType untyped for now rather than
@@ -719,8 +753,6 @@ void ReferenceTypePropagation::RTPVisitor::VisitBoundType(HBoundType* instr) {
}
void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast) {
- HLoadClass* load_class = check_cast->InputAt(1)->AsLoadClass();
- ReferenceTypeInfo class_rti = load_class->GetLoadedClassRTI();
HBoundType* bound_type = check_cast->GetNext()->AsBoundType();
if (bound_type == nullptr || bound_type->GetUpperBound().IsValid()) {
// The next instruction is not an uninitialized BoundType. This must be
@@ -729,12 +761,14 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast
}
DCHECK_EQ(bound_type->InputAt(0), check_cast->InputAt(0));
- if (class_rti.IsValid()) {
+ ScopedObjectAccess soa(Thread::Current());
+ Handle<mirror::Class> klass = check_cast->GetClass();
+ if (IsAdmissible(klass.Get())) {
DCHECK(is_first_run_);
- ScopedObjectAccess soa(Thread::Current());
+ check_cast->SetValidTargetClassRTI();
// This is the first run of RTP and class is resolved.
- bool is_exact = class_rti.GetTypeHandle()->CannotBeAssignedFromOtherTypes();
- bound_type->SetUpperBound(ReferenceTypeInfo::Create(class_rti.GetTypeHandle(), is_exact),
+ bool is_exact = klass->CannotBeAssignedFromOtherTypes();
+ bound_type->SetUpperBound(ReferenceTypeInfo::Create(klass, is_exact),
/* CheckCast succeeds for nulls. */ true);
} else {
// This is the first run of RTP and class is unresolved. Remove the binding.
@@ -880,7 +914,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) {
ScopedObjectAccess soa(Thread::Current());
ArtMethod* method = instr->GetResolvedMethod();
ObjPtr<mirror::Class> klass = (method == nullptr) ? nullptr : method->LookupResolvedReturnType();
- SetClassAsTypeInfo(instr, klass, /* is_exact */ false);
+ SetClassAsTypeInfo(instr, klass, /* is_exact= */ false);
}
void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) {
@@ -913,7 +947,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateBoundType(HBoundType* instr) {
// bound type is dead. To not confuse potential other optimizations, we mark
// the bound as non-exact.
instr->SetReferenceTypeInfo(
- ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact */ false));
+ ReferenceTypeInfo::Create(upper_bound_rti.GetTypeHandle(), /* is_exact= */ false));
}
}
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index fd4dad2b45..7c6a048444 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -40,7 +40,7 @@ class ReferenceTypePropagation : public HOptimization {
// Visit a single instruction.
void Visit(HInstruction* instruction);
- void Run() OVERRIDE;
+ bool Run() override;
// Returns true if klass is admissible to the propagation: non-null and resolved.
// For an array type, we also check if the component type is admissible.
@@ -75,6 +75,8 @@ class ReferenceTypePropagation : public HOptimization {
ReferenceTypeInfo::TypeHandle GetObjectClassHandle();
ReferenceTypeInfo::TypeHandle GetClassClassHandle();
+ ReferenceTypeInfo::TypeHandle GetMethodHandleClassHandle();
+ ReferenceTypeInfo::TypeHandle GetMethodTypeClassHandle();
ReferenceTypeInfo::TypeHandle GetStringClassHandle();
ReferenceTypeInfo::TypeHandle GetThrowableClassHandle();
@@ -83,6 +85,8 @@ class ReferenceTypePropagation : public HOptimization {
ReferenceTypeInfo::TypeHandle object_class_handle_;
ReferenceTypeInfo::TypeHandle class_class_handle_;
+ ReferenceTypeInfo::TypeHandle method_handle_class_handle_;
+ ReferenceTypeInfo::TypeHandle method_type_class_handle_;
ReferenceTypeInfo::TypeHandle string_class_handle_;
ReferenceTypeInfo::TypeHandle throwable_class_handle_;
};
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 27f9ac3990..b1f0a1add9 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -280,16 +280,16 @@ size_t RegisterAllocationResolver::CalculateMaximumSafepointSpillSize(
LocationSummary* locations = instruction->GetLocations();
if (locations->OnlyCallsOnSlowPath()) {
size_t core_spills =
- codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true);
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true);
size_t fp_spills =
- codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false);
+ codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false);
size_t spill_size =
core_register_spill_size * core_spills + fp_register_spill_size * fp_spills;
maximum_safepoint_spill_size = std::max(maximum_safepoint_spill_size, spill_size);
} else if (locations->CallsOnMainAndSlowPath()) {
// Nothing to spill on the slow path if the main path already clobbers caller-saves.
- DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ true));
- DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers */ false));
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ true));
+ DCHECK_EQ(0u, codegen_->GetNumberOfSlowPathSpills(locations, /* core_registers= */ false));
}
}
return maximum_safepoint_spill_size;
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index fa7ad82316..42e6498148 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1183,7 +1183,7 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo
void ColoringIteration::BuildInterferenceGraph(
const ScopedArenaVector<LiveInterval*>& intervals,
const ScopedArenaVector<InterferenceNode*>& physical_nodes) {
- DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty());
+ DCHECK(interval_node_map_.empty() && prunable_nodes_.empty());
// Build the interference graph efficiently by ordering range endpoints
// by position and doing a linear sweep to find interferences. (That is, we
// jump from endpoint to endpoint, maintaining a set of intervals live at each
@@ -1208,7 +1208,7 @@ void ColoringIteration::BuildInterferenceGraph(
if (range != nullptr) {
InterferenceNode* node =
new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_);
- interval_node_map_.Insert(std::make_pair(sibling, node));
+ interval_node_map_.insert(std::make_pair(sibling, node));
if (sibling->HasRegister()) {
// Fixed nodes should alias the canonical node for the corresponding register.
@@ -1303,7 +1303,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
// Coalesce siblings.
LiveInterval* next_sibling = interval->GetNextSibling();
if (next_sibling != nullptr && interval->GetEnd() == next_sibling->GetStart()) {
- auto it = interval_node_map_.Find(next_sibling);
+ auto it = interval_node_map_.find(next_sibling);
if (it != interval_node_map_.end()) {
InterferenceNode* sibling_node = it->second;
CreateCoalesceOpportunity(node,
@@ -1318,7 +1318,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
if (parent->HasRegister()
&& parent->GetNextSibling() == interval
&& parent->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.Find(parent);
+ auto it = interval_node_map_.find(parent);
if (it != interval_node_map_.end()) {
InterferenceNode* parent_node = it->second;
CreateCoalesceOpportunity(node,
@@ -1341,7 +1341,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
size_t position = predecessor->GetLifetimeEnd() - 1;
LiveInterval* existing = interval->GetParent()->GetSiblingAt(position);
if (existing != nullptr) {
- auto it = interval_node_map_.Find(existing);
+ auto it = interval_node_map_.find(existing);
if (it != interval_node_map_.end()) {
InterferenceNode* existing_node = it->second;
CreateCoalesceOpportunity(node,
@@ -1364,7 +1364,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
size_t position = predecessors[i]->GetLifetimeEnd() - 1;
LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(position);
- auto it = interval_node_map_.Find(input_interval);
+ auto it = interval_node_map_.find(input_interval);
if (it != interval_node_map_.end()) {
InterferenceNode* input_node = it->second;
CreateCoalesceOpportunity(node, input_node, CoalesceKind::kPhi, position);
@@ -1380,7 +1380,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
= defined_by->InputAt(0)->GetLiveInterval()->GetSiblingAt(interval->GetStart() - 1);
// TODO: Could we consider lifetime holes here?
if (input_interval->GetEnd() == interval->GetStart()) {
- auto it = interval_node_map_.Find(input_interval);
+ auto it = interval_node_map_.find(input_interval);
if (it != interval_node_map_.end()) {
InterferenceNode* input_node = it->second;
CreateCoalesceOpportunity(node,
@@ -1407,7 +1407,7 @@ void ColoringIteration::FindCoalesceOpportunities() {
LiveInterval* input_interval = inputs[i]->GetLiveInterval()->GetSiblingAt(def_point);
if (input_interval != nullptr &&
input_interval->HasHighInterval() == interval->HasHighInterval()) {
- auto it = interval_node_map_.Find(input_interval);
+ auto it = interval_node_map_.find(input_interval);
if (it != interval_node_map_.end()) {
InterferenceNode* input_node = it->second;
CreateCoalesceOpportunity(node,
diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h
index 3072c92e0f..f0e7e55863 100644
--- a/compiler/optimizing/register_allocator_graph_color.h
+++ b/compiler/optimizing/register_allocator_graph_color.h
@@ -90,11 +90,11 @@ class RegisterAllocatorGraphColor : public RegisterAllocator {
CodeGenerator* codegen,
const SsaLivenessAnalysis& analysis,
bool iterative_move_coalescing = true);
- ~RegisterAllocatorGraphColor() OVERRIDE;
+ ~RegisterAllocatorGraphColor() override;
- void AllocateRegisters() OVERRIDE;
+ void AllocateRegisters() override;
- bool Validate(bool log_fatal_on_failure);
+ bool Validate(bool log_fatal_on_failure) override;
private:
// Collect all intervals and prepare for register allocation.
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 216fb57a96..0d6c5a3eff 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -252,7 +252,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
temp_intervals_.push_back(interval);
interval->AddTempUse(instruction, i);
if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) {
- interval->AddHighInterval(/* is_temp */ true);
+ interval->AddHighInterval(/* is_temp= */ true);
LiveInterval* high = interval->GetHighInterval();
temp_intervals_.push_back(high);
unhandled_fp_intervals_.push_back(high);
@@ -284,7 +284,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
}
if (locations->WillCall()) {
- BlockRegisters(position, position + 1, /* caller_save_only */ true);
+ BlockRegisters(position, position + 1, /* caller_save_only= */ true);
}
for (size_t i = 0; i < locations->GetInputCount(); ++i) {
@@ -312,7 +312,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction)
for (size_t safepoint_index = safepoints_.size(); safepoint_index > 0; --safepoint_index) {
HInstruction* safepoint = safepoints_[safepoint_index - 1u];
- size_t safepoint_position = safepoint->GetLifetimePosition();
+ size_t safepoint_position = SafepointPosition::ComputePosition(safepoint);
// Test that safepoints are ordered in the optimal way.
DCHECK(safepoint_index == safepoints_.size() ||
diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h
index 36788b7c3c..4d445c7ff7 100644
--- a/compiler/optimizing/register_allocator_linear_scan.h
+++ b/compiler/optimizing/register_allocator_linear_scan.h
@@ -42,11 +42,11 @@ class RegisterAllocatorLinearScan : public RegisterAllocator {
RegisterAllocatorLinearScan(ScopedArenaAllocator* allocator,
CodeGenerator* codegen,
const SsaLivenessAnalysis& analysis);
- ~RegisterAllocatorLinearScan() OVERRIDE;
+ ~RegisterAllocatorLinearScan() override;
- void AllocateRegisters() OVERRIDE;
+ void AllocateRegisters() override;
- bool Validate(bool log_fatal_on_failure) OVERRIDE {
+ bool Validate(bool log_fatal_on_failure) override {
processing_core_registers_ = true;
if (!ValidateInternal(log_fatal_on_failure)) {
return false;
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index a70b0664dc..79eb082cd7 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -40,6 +40,12 @@ using Strategy = RegisterAllocator::Strategy;
class RegisterAllocatorTest : public OptimizingUnitTest {
protected:
+ void SetUp() override {
+ // This test is using the x86 ISA.
+ OverrideInstructionSetFeatures(InstructionSet::kX86, "default");
+ OptimizingUnitTest::SetUp();
+ }
+
// These functions need to access private variables of LocationSummary, so we declare it
// as a member of RegisterAllocatorTest, which we make a friend class.
void SameAsFirstInputHint(Strategy strategy);
@@ -62,11 +68,11 @@ class RegisterAllocatorTest : public OptimizingUnitTest {
bool ValidateIntervals(const ScopedArenaVector<LiveInterval*>& intervals,
const CodeGenerator& codegen) {
return RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const>(intervals),
- /* number_of_spill_slots */ 0u,
- /* number_of_out_slots */ 0u,
+ /* number_of_spill_slots= */ 0u,
+ /* number_of_out_slots= */ 0u,
codegen,
- /* processing_core_registers */ true,
- /* log_fatal_on_failure */ false);
+ /* processing_core_registers= */ true,
+ /* log_fatal_on_failure= */ false);
}
};
@@ -81,9 +87,7 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\
bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) {
HGraph* graph = CreateCFG(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
std::unique_ptr<RegisterAllocator> register_allocator =
@@ -98,9 +102,7 @@ bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy st
*/
TEST_F(RegisterAllocatorTest, ValidateIntervals) {
HGraph* graph = CreateGraph();
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter());
// Test with two intervals of the same range.
@@ -324,9 +326,7 @@ void RegisterAllocatorTest::Loop3(Strategy strategy) {
Instruction::GOTO | 0xF900);
HGraph* graph = CreateCFG(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
std::unique_ptr<RegisterAllocator> register_allocator =
@@ -359,9 +359,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) {
Instruction::RETURN_VOID);
HGraph* graph = CreateCFG(data);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -412,9 +410,7 @@ void RegisterAllocatorTest::DeadPhi(Strategy strategy) {
HGraph* graph = CreateCFG(data);
SsaDeadPhiElimination(graph).Run();
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
std::unique_ptr<RegisterAllocator> register_allocator =
@@ -438,9 +434,7 @@ TEST_F(RegisterAllocatorTest, FreeUntil) {
HGraph* graph = CreateCFG(data);
SsaDeadPhiElimination(graph).Run();
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness);
@@ -566,9 +560,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) {
{
HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -584,9 +576,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) {
{
HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -604,9 +594,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) {
{
HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -624,9 +612,7 @@ void RegisterAllocatorTest::PhiHint(Strategy strategy) {
{
HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -689,9 +675,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
{
HGraph* graph = BuildFieldReturn(&field, &ret);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -705,9 +689,7 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) {
{
HGraph* graph = BuildFieldReturn(&field, &ret);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -761,9 +743,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
{
HGraph* graph = BuildTwoSubs(&first_sub, &second_sub);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -778,9 +758,7 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) {
{
HGraph* graph = BuildTwoSubs(&first_sub, &second_sub);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -834,9 +812,7 @@ HGraph* RegisterAllocatorTest::BuildDiv(HInstruction** div) {
void RegisterAllocatorTest::ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) {
HInstruction *div;
HGraph* graph = BuildDiv(&div);
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
liveness.Analyze();
@@ -896,9 +872,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
// Create an interval with lifetime holes.
static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), -1, one);
- first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8));
- first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 7));
- first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 6));
+ first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8));
+ first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 7));
+ first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 6));
locations = new (GetAllocator()) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
@@ -919,9 +895,9 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
// before lifetime position 6 yet.
static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), GetScopedAllocator(), -1, three);
- third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8));
- third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 4));
- third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 3));
+ third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 8));
+ third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 4));
+ third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, 0u, 3));
locations = new (GetAllocator()) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
third = third->SplitAt(3);
@@ -934,9 +910,7 @@ TEST_F(RegisterAllocatorTest, SpillInactive) {
new (GetAllocator()) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall);
locations->SetOut(Location::RequiresRegister());
- std::unique_ptr<const X86InstructionSetFeatures> features_x86(
- X86InstructionSetFeatures::FromCppDefines());
- x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions());
+ x86::CodeGeneratorX86 codegen(graph, *compiler_options_);
SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator());
// Populate the instructions in the liveness object, to please the register allocator.
for (size_t i = 0; i < 32; ++i) {
diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc
index bb28d50b56..fdef45ec8b 100644
--- a/compiler/optimizing/scheduler.cc
+++ b/compiler/optimizing/scheduler.cc
@@ -70,19 +70,19 @@ static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) {
return false;
}
-size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const {
+size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* instruction) const {
DCHECK(heap_location_collector_ != nullptr);
- size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(array, index);
+ size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(instruction);
// This array access should be analyzed and added to HeapLocationCollector before.
DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound);
return heap_loc;
}
-bool SchedulingGraph::ArrayAccessMayAlias(const HInstruction* node,
- const HInstruction* other) const {
+bool SchedulingGraph::ArrayAccessMayAlias(HInstruction* node,
+ HInstruction* other) const {
DCHECK(heap_location_collector_ != nullptr);
- size_t node_heap_loc = ArrayAccessHeapLocation(node->InputAt(0), node->InputAt(1));
- size_t other_heap_loc = ArrayAccessHeapLocation(other->InputAt(0), other->InputAt(1));
+ size_t node_heap_loc = ArrayAccessHeapLocation(node);
+ size_t other_heap_loc = ArrayAccessHeapLocation(other);
// For example: arr[0] and arr[0]
if (node_heap_loc == other_heap_loc) {
@@ -194,8 +194,8 @@ bool SchedulingGraph::FieldAccessMayAlias(const HInstruction* node,
return true;
}
-bool SchedulingGraph::HasMemoryDependency(const HInstruction* node,
- const HInstruction* other) const {
+bool SchedulingGraph::HasMemoryDependency(HInstruction* node,
+ HInstruction* other) const {
if (!MayHaveReorderingDependency(node->GetSideEffects(), other->GetSideEffects())) {
return false;
}
@@ -264,8 +264,8 @@ bool SchedulingGraph::HasExceptionDependency(const HInstruction* node,
// Check whether `node` depends on `other`, taking into account `SideEffect`
// information and `CanThrow` information.
-bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node,
- const HInstruction* other) const {
+bool SchedulingGraph::HasSideEffectDependency(HInstruction* node,
+ HInstruction* other) const {
if (HasMemoryDependency(node, other)) {
return true;
}
@@ -280,6 +280,23 @@ bool SchedulingGraph::HasSideEffectDependency(const HInstruction* node,
return false;
}
+// Check if the specified instruction is a better candidate which more likely will
+// have other instructions depending on it.
+static bool IsBetterCandidateWithMoreLikelyDependencies(HInstruction* new_candidate,
+ HInstruction* old_candidate) {
+ if (!new_candidate->GetSideEffects().Includes(old_candidate->GetSideEffects())) {
+ // Weaker side effects.
+ return false;
+ }
+ if (old_candidate->GetSideEffects().Includes(new_candidate->GetSideEffects())) {
+ // Same side effects, check if `new_candidate` has stronger `CanThrow()`.
+ return new_candidate->CanThrow() && !old_candidate->CanThrow();
+ } else {
+ // Stronger side effects, check if `new_candidate` has at least as strong `CanThrow()`.
+ return new_candidate->CanThrow() || !old_candidate->CanThrow();
+ }
+}
+
void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_scheduling_barrier) {
SchedulingNode* instruction_node = GetNode(instruction);
@@ -331,6 +348,7 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul
// Side effect dependencies.
if (!instruction->GetSideEffects().DoesNothing() || instruction->CanThrow()) {
+ HInstruction* dep_chain_candidate = nullptr;
for (HInstruction* other = instruction->GetNext(); other != nullptr; other = other->GetNext()) {
SchedulingNode* other_node = GetNode(other);
if (other_node->IsSchedulingBarrier()) {
@@ -340,7 +358,18 @@ void SchedulingGraph::AddDependencies(HInstruction* instruction, bool is_schedul
break;
}
if (HasSideEffectDependency(other, instruction)) {
- AddOtherDependency(other_node, instruction_node);
+ if (dep_chain_candidate != nullptr &&
+ HasSideEffectDependency(other, dep_chain_candidate)) {
+ // Skip an explicit dependency to reduce memory usage, rely on the transitive dependency.
+ } else {
+ AddOtherDependency(other_node, instruction_node);
+ }
+ // Check if `other` is a better candidate which more likely will have other instructions
+ // depending on it.
+ if (dep_chain_candidate == nullptr ||
+ IsBetterCandidateWithMoreLikelyDependencies(other, dep_chain_candidate)) {
+ dep_chain_candidate = other;
+ }
}
}
}
@@ -545,60 +574,67 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::GetHigherPrioritySchedulingN
void HScheduler::Schedule(HGraph* graph) {
// We run lsa here instead of in a separate pass to better control whether we
// should run the analysis or not.
+ const HeapLocationCollector* heap_location_collector = nullptr;
LoadStoreAnalysis lsa(graph);
if (!only_optimize_loop_blocks_ || graph->HasLoops()) {
lsa.Run();
- scheduling_graph_.SetHeapLocationCollector(lsa.GetHeapLocationCollector());
+ heap_location_collector = &lsa.GetHeapLocationCollector();
}
for (HBasicBlock* block : graph->GetReversePostOrder()) {
if (IsSchedulable(block)) {
- Schedule(block);
+ Schedule(block, heap_location_collector);
}
}
}
-void HScheduler::Schedule(HBasicBlock* block) {
- ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler));
+void HScheduler::Schedule(HBasicBlock* block,
+ const HeapLocationCollector* heap_location_collector) {
+ ScopedArenaAllocator allocator(block->GetGraph()->GetArenaStack());
+ ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator.Adapter(kArenaAllocScheduler));
// Build the scheduling graph.
- scheduling_graph_.Clear();
+ SchedulingGraph scheduling_graph(this, &allocator, heap_location_collector);
for (HBackwardInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
CHECK_EQ(instruction->GetBlock(), block)
<< instruction->DebugName()
<< " is in block " << instruction->GetBlock()->GetBlockId()
<< ", and expected in block " << block->GetBlockId();
- SchedulingNode* node = scheduling_graph_.AddNode(instruction, IsSchedulingBarrier(instruction));
+ SchedulingNode* node = scheduling_graph.AddNode(instruction, IsSchedulingBarrier(instruction));
CalculateLatency(node);
scheduling_nodes.push_back(node);
}
- if (scheduling_graph_.Size() <= 1) {
- scheduling_graph_.Clear();
+ if (scheduling_graph.Size() <= 1) {
return;
}
cursor_ = block->GetLastInstruction();
+ // The list of candidates for scheduling. A node becomes a candidate when all
+ // its predecessors have been scheduled.
+ ScopedArenaVector<SchedulingNode*> candidates(allocator.Adapter(kArenaAllocScheduler));
+
// Find the initial candidates for scheduling.
- candidates_.clear();
for (SchedulingNode* node : scheduling_nodes) {
if (!node->HasUnscheduledSuccessors()) {
node->MaybeUpdateCriticalPath(node->GetLatency());
- candidates_.push_back(node);
+ candidates.push_back(node);
}
}
- ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler));
+ ScopedArenaVector<SchedulingNode*> initial_candidates(allocator.Adapter(kArenaAllocScheduler));
if (kDumpDotSchedulingGraphs) {
// Remember the list of initial candidates for debug output purposes.
- initial_candidates.assign(candidates_.begin(), candidates_.end());
+ initial_candidates.assign(candidates.begin(), candidates.end());
}
// Schedule all nodes.
- while (!candidates_.empty()) {
- Schedule(selector_->PopHighestPriorityNode(&candidates_, scheduling_graph_));
+ selector_->Reset();
+ while (!candidates.empty()) {
+ SchedulingNode* node = selector_->PopHighestPriorityNode(&candidates, scheduling_graph);
+ Schedule(node, &candidates);
}
if (kDumpDotSchedulingGraphs) {
@@ -607,11 +643,12 @@ void HScheduler::Schedule(HBasicBlock* block) {
std::stringstream description;
description << graph->GetDexFile().PrettyMethod(graph->GetMethodIdx())
<< " B" << block->GetBlockId();
- scheduling_graph_.DumpAsDotGraph(description.str(), initial_candidates);
+ scheduling_graph.DumpAsDotGraph(description.str(), initial_candidates);
}
}
-void HScheduler::Schedule(SchedulingNode* scheduling_node) {
+void HScheduler::Schedule(SchedulingNode* scheduling_node,
+ /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates) {
// Check whether any of the node's predecessors will be valid candidates after
// this node is scheduled.
uint32_t path_to_node = scheduling_node->GetCriticalPath();
@@ -620,7 +657,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) {
path_to_node + predecessor->GetInternalLatency() + predecessor->GetLatency());
predecessor->DecrementNumberOfUnscheduledSuccessors();
if (!predecessor->HasUnscheduledSuccessors()) {
- candidates_.push_back(predecessor);
+ candidates->push_back(predecessor);
}
}
for (SchedulingNode* predecessor : scheduling_node->GetOtherPredecessors()) {
@@ -630,7 +667,7 @@ void HScheduler::Schedule(SchedulingNode* scheduling_node) {
// correctness. So we do not use them to compute the critical path.
predecessor->DecrementNumberOfUnscheduledSuccessors();
if (!predecessor->HasUnscheduledSuccessors()) {
- candidates_.push_back(predecessor);
+ candidates->push_back(predecessor);
}
}
@@ -643,7 +680,7 @@ static void MoveAfterInBlock(HInstruction* instruction, HInstruction* cursor) {
DCHECK_NE(cursor, cursor->GetBlock()->GetLastInstruction());
DCHECK(!instruction->IsControlFlow());
DCHECK(!cursor->IsControlFlow());
- instruction->MoveBefore(cursor->GetNext(), /* do_checks */ false);
+ instruction->MoveBefore(cursor->GetNext(), /* do_checks= */ false);
}
void HScheduler::Schedule(HInstruction* instruction) {
@@ -667,7 +704,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
// HUnaryOperation (or HBinaryOperation), check in debug mode that we have
// the exhaustive lists here.
if (instruction->IsUnaryOperation()) {
- DCHECK(instruction->IsBooleanNot() ||
+ DCHECK(instruction->IsAbs() ||
+ instruction->IsBooleanNot() ||
instruction->IsNot() ||
instruction->IsNeg()) << "unexpected instruction " << instruction->DebugName();
return true;
@@ -678,6 +716,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const {
instruction->IsCompare() ||
instruction->IsCondition() ||
instruction->IsDiv() ||
+ instruction->IsMin() ||
+ instruction->IsMax() ||
instruction->IsMul() ||
instruction->IsOr() ||
instruction->IsRem() ||
@@ -771,12 +811,11 @@ bool HScheduler::IsSchedulingBarrier(const HInstruction* instr) const {
instr->IsSuspendCheck();
}
-void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
+bool HInstructionScheduling::Run(bool only_optimize_loop_blocks,
bool schedule_randomly) {
#if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm)
// Phase-local allocator that allocates scheduler internal data structures like
// scheduling nodes, internel nodes map, dependencies, etc.
- ScopedArenaAllocator allocator(graph_->GetArenaStack());
CriticalPathSchedulingNodeSelector critical_path_selector;
RandomSchedulingNodeSelector random_selector;
SchedulingNodeSelector* selector = schedule_randomly
@@ -792,7 +831,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
switch (instruction_set_) {
#ifdef ART_ENABLE_CODEGEN_arm64
case InstructionSet::kArm64: {
- arm64::HSchedulerARM64 scheduler(&allocator, selector);
+ arm64::HSchedulerARM64 scheduler(selector);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
@@ -802,7 +841,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
case InstructionSet::kThumb2:
case InstructionSet::kArm: {
arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_);
- arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(selector, &arm_latency_visitor);
scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks);
scheduler.Schedule(graph_);
break;
@@ -811,6 +850,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks,
default:
break;
}
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index dfa077f7de..d2dbeca924 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -23,7 +23,6 @@
#include "base/scoped_arena_containers.h"
#include "base/time_utils.h"
#include "code_generator.h"
-#include "driver/compiler_driver.h"
#include "load_store_analysis.h"
#include "nodes.h"
#include "optimization.h"
@@ -251,34 +250,27 @@ class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> {
*/
class SchedulingGraph : public ValueObject {
public:
- SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator)
+ SchedulingGraph(const HScheduler* scheduler,
+ ScopedArenaAllocator* allocator,
+ const HeapLocationCollector* heap_location_collector)
: scheduler_(scheduler),
allocator_(allocator),
contains_scheduling_barrier_(false),
nodes_map_(allocator_->Adapter(kArenaAllocScheduler)),
- heap_location_collector_(nullptr) {}
+ heap_location_collector_(heap_location_collector) {}
SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) {
std::unique_ptr<SchedulingNode> node(
new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier));
SchedulingNode* result = node.get();
- nodes_map_.Insert(std::make_pair(instr, std::move(node)));
+ nodes_map_.insert(std::make_pair(instr, std::move(node)));
contains_scheduling_barrier_ |= is_scheduling_barrier;
AddDependencies(instr, is_scheduling_barrier);
return result;
}
- void Clear() {
- nodes_map_.Clear();
- contains_scheduling_barrier_ = false;
- }
-
- void SetHeapLocationCollector(const HeapLocationCollector& heap_location_collector) {
- heap_location_collector_ = &heap_location_collector;
- }
-
SchedulingNode* GetNode(const HInstruction* instr) const {
- auto it = nodes_map_.Find(instr);
+ auto it = nodes_map_.find(instr);
if (it == nodes_map_.end()) {
return nullptr;
} else {
@@ -294,7 +286,7 @@ class SchedulingGraph : public ValueObject {
bool HasImmediateOtherDependency(const HInstruction* node, const HInstruction* other) const;
size_t Size() const {
- return nodes_map_.Size();
+ return nodes_map_.size();
}
// Dump the scheduling graph, in dot file format, appending it to the file
@@ -310,12 +302,12 @@ class SchedulingGraph : public ValueObject {
void AddOtherDependency(SchedulingNode* node, SchedulingNode* dependency) {
AddDependency(node, dependency, /*is_data_dependency*/false);
}
- bool HasMemoryDependency(const HInstruction* node, const HInstruction* other) const;
+ bool HasMemoryDependency(HInstruction* node, HInstruction* other) const;
bool HasExceptionDependency(const HInstruction* node, const HInstruction* other) const;
- bool HasSideEffectDependency(const HInstruction* node, const HInstruction* other) const;
- bool ArrayAccessMayAlias(const HInstruction* node, const HInstruction* other) const;
+ bool HasSideEffectDependency(HInstruction* node, HInstruction* other) const;
+ bool ArrayAccessMayAlias(HInstruction* node, HInstruction* other) const;
bool FieldAccessMayAlias(const HInstruction* node, const HInstruction* other) const;
- size_t ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const;
+ size_t ArrayAccessHeapLocation(HInstruction* instruction) const;
size_t FieldAccessHeapLocation(HInstruction* obj, const FieldInfo* field) const;
// Add dependencies nodes for the given `HInstruction`: inputs, environments, and side-effects.
@@ -329,7 +321,7 @@ class SchedulingGraph : public ValueObject {
ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_;
- const HeapLocationCollector* heap_location_collector_;
+ const HeapLocationCollector* const heap_location_collector_;
};
/*
@@ -346,7 +338,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
last_visited_latency_(0),
last_visited_internal_latency_(0) {}
- void VisitInstruction(HInstruction* instruction) OVERRIDE {
+ void VisitInstruction(HInstruction* instruction) override {
LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
"Architecture-specific scheduling latency visitors must handle all instructions"
" (potentially by overriding the generic `VisitInstruction()`.";
@@ -377,6 +369,7 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor {
class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> {
public:
+ virtual void Reset() {}
virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
const SchedulingGraph& graph) = 0;
virtual ~SchedulingNodeSelector() {}
@@ -398,7 +391,7 @@ class RandomSchedulingNodeSelector : public SchedulingNodeSelector {
}
SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
- const SchedulingGraph& graph) OVERRIDE {
+ const SchedulingGraph& graph) override {
UNUSED(graph);
DCHECK(!nodes->empty());
size_t select = rand_r(&seed_) % nodes->size();
@@ -418,8 +411,9 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
public:
CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {}
+ void Reset() override { prev_select_ = nullptr; }
SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes,
- const SchedulingGraph& graph) OVERRIDE;
+ const SchedulingGraph& graph) override;
protected:
SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate,
@@ -434,16 +428,11 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector {
class HScheduler {
public:
- HScheduler(ScopedArenaAllocator* allocator,
- SchedulingLatencyVisitor* latency_visitor,
- SchedulingNodeSelector* selector)
- : allocator_(allocator),
- latency_visitor_(latency_visitor),
+ HScheduler(SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector)
+ : latency_visitor_(latency_visitor),
selector_(selector),
only_optimize_loop_blocks_(true),
- scheduling_graph_(this, allocator),
- cursor_(nullptr),
- candidates_(allocator_->Adapter(kArenaAllocScheduler)) {}
+ cursor_(nullptr) {}
virtual ~HScheduler() {}
void Schedule(HGraph* graph);
@@ -454,8 +443,9 @@ class HScheduler {
virtual bool IsSchedulingBarrier(const HInstruction* instruction) const;
protected:
- void Schedule(HBasicBlock* block);
- void Schedule(SchedulingNode* scheduling_node);
+ void Schedule(HBasicBlock* block, const HeapLocationCollector* heap_location_collector);
+ void Schedule(SchedulingNode* scheduling_node,
+ /*inout*/ ScopedArenaVector<SchedulingNode*>* candidates);
void Schedule(HInstruction* instruction);
// Any instruction returning `false` via this method will prevent its
@@ -476,19 +466,12 @@ class HScheduler {
node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency());
}
- ScopedArenaAllocator* const allocator_;
SchedulingLatencyVisitor* const latency_visitor_;
SchedulingNodeSelector* const selector_;
bool only_optimize_loop_blocks_;
- // We instantiate the members below as part of this class to avoid
- // instantiating them locally for every chunk scheduled.
- SchedulingGraph scheduling_graph_;
// A pointer indicating where the next instruction to be scheduled will be inserted.
HInstruction* cursor_;
- // The list of candidates for scheduling. A node becomes a candidate when all
- // its predecessors have been scheduled.
- ScopedArenaVector<SchedulingNode*> candidates_;
private:
DISALLOW_COPY_AND_ASSIGN(HScheduler);
@@ -508,10 +491,11 @@ class HInstructionScheduling : public HOptimization {
codegen_(cg),
instruction_set_(instruction_set) {}
- void Run() {
- Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
+ bool Run() override {
+ return Run(/*only_optimize_loop_blocks*/ true, /*schedule_randomly*/ false);
}
- void Run(bool only_optimize_loop_blocks, bool schedule_randomly);
+
+ bool Run(bool only_optimize_loop_blocks, bool schedule_randomly);
static constexpr const char* kInstructionSchedulingPassName = "scheduler";
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 8dcadaad2e..858a555e97 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -563,7 +563,7 @@ void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp*
last_visited_internal_latency_ = kArmIntegerOpLatency;
last_visited_latency_ = kArmIntegerOpLatency;
} else {
- HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency= */ true);
HandleGenerateDataProcInstruction();
}
}
@@ -585,8 +585,8 @@ void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifte
DCHECK_LT(shift_value, 32U);
if (kind == HInstruction::kOr || kind == HInstruction::kXor) {
- HandleGenerateDataProcInstruction(/* internal_latency */ true);
- HandleGenerateDataProcInstruction(/* internal_latency */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency= */ true);
+ HandleGenerateDataProcInstruction(/* internal_latency= */ true);
HandleGenerateDataProcInstruction();
} else {
last_visited_internal_latency_ += 2 * kArmIntegerOpLatency;
@@ -679,7 +679,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) {
} else {
last_visited_internal_latency_ += kArmIntegerOpLatency;
}
- last_visited_internal_latency_ = kArmMemoryLoadLatency;
+ last_visited_latency_ = kArmMemoryLoadLatency;
}
}
break;
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 0cb8684376..4c7a3bb4d6 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -55,7 +55,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
: codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
last_visited_latency_ = kArmIntegerOpLatency;
}
@@ -100,7 +100,7 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
M(DataProcWithShifterOp, unused)
#define DECLARE_VISIT_INSTRUCTION(type, unused) \
- void Visit##type(H##type* instruction) OVERRIDE;
+ void Visit##type(H##type* instruction) override;
FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
@@ -137,13 +137,12 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
class HSchedulerARM : public HScheduler {
public:
- HSchedulerARM(ScopedArenaAllocator* allocator,
- SchedulingNodeSelector* selector,
+ HSchedulerARM(SchedulingNodeSelector* selector,
SchedulingLatencyVisitorARM* arm_latency_visitor)
- : HScheduler(allocator, arm_latency_visitor, selector) {}
- ~HSchedulerARM() OVERRIDE {}
+ : HScheduler(arm_latency_visitor, selector) {}
+ ~HSchedulerARM() override {}
- bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+ bool IsSchedulable(const HInstruction* instruction) const override {
#define CASE_INSTRUCTION_KIND(type, unused) case \
HInstruction::InstructionKind::k##type:
switch (instruction->GetKind()) {
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index f71cb5b784..ba5a743545 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -58,7 +58,7 @@ static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
public:
// Default visitor for instructions not handled specifically below.
- void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
+ void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
last_visited_latency_ = kArm64IntegerOpLatency;
}
@@ -68,12 +68,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(ArrayGet , unused) \
M(ArrayLength , unused) \
M(ArraySet , unused) \
- M(BinaryOperation , unused) \
M(BoundsCheck , unused) \
M(Div , unused) \
M(InstanceFieldGet , unused) \
M(InstanceOf , unused) \
- M(Invoke , unused) \
M(LoadString , unused) \
M(Mul , unused) \
M(NewArray , unused) \
@@ -108,6 +106,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(VecLoad , unused) \
M(VecStore , unused)
+#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \
+ M(BinaryOperation , unused) \
+ M(Invoke , unused)
+
#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
M(BitwiseNegatedRight, unused) \
M(MultiplyAccumulate, unused) \
@@ -116,9 +118,10 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
M(DataProcWithShifterOp, unused)
#define DECLARE_VISIT_INSTRUCTION(type, unused) \
- void Visit##type(H##type* instruction) OVERRIDE;
+ void Visit##type(H##type* instruction) override;
FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
+ FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
@@ -131,11 +134,11 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
class HSchedulerARM64 : public HScheduler {
public:
- HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector)
- : HScheduler(allocator, &arm64_latency_visitor_, selector) {}
- ~HSchedulerARM64() OVERRIDE {}
+ explicit HSchedulerARM64(SchedulingNodeSelector* selector)
+ : HScheduler(&arm64_latency_visitor_, selector) {}
+ ~HSchedulerARM64() override {}
- bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
+ bool IsSchedulable(const HInstruction* instruction) const override {
#define CASE_INSTRUCTION_KIND(type, unused) case \
HInstruction::InstructionKind::k##type:
switch (instruction->GetKind()) {
@@ -157,7 +160,7 @@ class HSchedulerARM64 : public HScheduler {
// SIMD&FP registers are callee saved) so don't reorder such vector instructions.
//
// TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
- bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE {
+ bool IsSchedulingBarrier(const HInstruction* instr) const override {
return HScheduler::IsSchedulingBarrier(instr) ||
instr->IsVecReduce() ||
instr->IsVecExtractScalar() ||
diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc
index fb15fc8975..e0e265a04c 100644
--- a/compiler/optimizing/scheduler_test.cc
+++ b/compiler/optimizing/scheduler_test.cc
@@ -146,7 +146,9 @@ class SchedulerTest : public OptimizingUnitTest {
environment->SetRawEnvAt(1, mul);
mul->AddEnvUseAt(div_check->GetEnvironment(), 1);
- SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator());
+ SchedulingGraph scheduling_graph(scheduler,
+ GetScopedAllocator(),
+ /* heap_location_collector= */ nullptr);
// Instructions must be inserted in reverse order into the scheduling graph.
for (HInstruction* instr : ReverseRange(block_instructions)) {
scheduling_graph.AddNode(instr);
@@ -169,7 +171,9 @@ class SchedulerTest : public OptimizingUnitTest {
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set1, array_get1));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_get2));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_get2, array_set1));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
+ // Unnecessary dependency is not stored, we rely on transitive dependencies.
+ // The array_set2 -> array_get2 -> array_set1 dependencies are tested above.
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(array_set2, array_set1));
// Env dependency.
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(div_check, mul));
@@ -192,7 +196,9 @@ class SchedulerTest : public OptimizingUnitTest {
HInstructionScheduling scheduling(graph, target_config.GetInstructionSet());
scheduling.Run(/*only_optimize_loop_blocks*/ false, /*schedule_randomly*/ true);
+ OverrideInstructionSetFeatures(target_config.GetInstructionSet(), "default");
RunCode(target_config,
+ *compiler_options_,
graph,
[](HGraph* graph_arg) { RemoveSuspendChecks(graph_arg); },
has_result, expected);
@@ -274,11 +280,10 @@ class SchedulerTest : public OptimizingUnitTest {
entry->AddInstruction(instr);
}
- SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator());
HeapLocationCollector heap_location_collector(graph_);
heap_location_collector.VisitBasicBlock(entry);
heap_location_collector.BuildAliasingMatrix();
- scheduling_graph.SetHeapLocationCollector(heap_location_collector);
+ SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator(), &heap_location_collector);
for (HInstruction* instr : ReverseRange(block_instructions)) {
// Build scheduling graph with memory access aliasing information
@@ -296,47 +301,53 @@ class SchedulerTest : public OptimizingUnitTest {
size_t loc2 = HeapLocationCollector::kHeapLocationNotFound;
// Test side effect dependency: array[0] and array[1]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, c0);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, c1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_0);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_1);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0));
// Test side effect dependency based on LSA analysis: array[i] and array[j]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, i);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, j);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_j);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
+ // Unnecessary dependency is not stored, we rely on transitive dependencies.
+ // The arr_set_j -> arr_set_sub0 -> arr_set_add0 -> arr_set_i dependencies are tested below.
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
// Test side effect dependency based on LSA analysis: array[i] and array[i+0]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, i);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, add0);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add0);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i));
// Test side effect dependency based on LSA analysis: array[i] and array[i-0]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, i);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub0);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub0);
ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i));
+ // Unnecessary dependency is not stored, we rely on transitive dependencies.
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i));
+ // Instead, we rely on arr_set_sub0 -> arr_set_add0 -> arr_set_i, the latter is tested above.
+ ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_add0));
// Test side effect dependency based on LSA analysis: array[i] and array[i+1]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, i);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, add1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_i);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_add1);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i));
// Test side effect dependency based on LSA analysis: array[i+1] and array[i-1]
- loc1 = heap_location_collector.GetArrayHeapLocation(arr, add1);
- loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub1);
+ loc1 = heap_location_collector.GetArrayHeapLocation(arr_set_add1);
+ loc2 = heap_location_collector.GetArrayHeapLocation(arr_set_sub1);
ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1));
// Test side effect dependency based on LSA analysis: array[j] and all others array accesses
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
- ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub0));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add1));
ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_sub1));
+ // Unnecessary dependencies are not stored, we rely on transitive dependencies.
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i));
+ ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_add0));
// Test that ArraySet and FieldSet should not have side effect dependency
ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_i, set_field10));
@@ -352,13 +363,13 @@ class SchedulerTest : public OptimizingUnitTest {
#if defined(ART_ENABLE_CODEGEN_arm64)
TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) {
CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector);
+ arm64::HSchedulerARM64 scheduler(&critical_path_selector);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
TEST_F(SchedulerTest, ArrayAccessAliasingARM64) {
CriticalPathSchedulingNodeSelector critical_path_selector;
- arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector);
+ arm64::HSchedulerARM64 scheduler(&critical_path_selector);
TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
}
#endif
@@ -367,14 +378,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) {
TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) {
CriticalPathSchedulingNodeSelector critical_path_selector;
arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
- arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor);
TestBuildDependencyGraphAndSchedule(&scheduler);
}
TEST_F(SchedulerTest, ArrayAccessAliasingARM) {
CriticalPathSchedulingNodeSelector critical_path_selector;
arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr);
- arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor);
+ arm::HSchedulerARM scheduler(&critical_path_selector, &arm_latency_visitor);
TestDependencyGraphOnAliasingArrayAccesses(&scheduler);
}
#endif
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index 66e51421ca..dcc7f77fc2 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -16,6 +16,7 @@
#include "select_generator.h"
+#include "base/scoped_arena_containers.h"
#include "reference_type_propagation.h"
namespace art {
@@ -43,12 +44,18 @@ static bool IsSimpleBlock(HBasicBlock* block) {
for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
HInstruction* instruction = it.Current();
if (instruction->IsControlFlow()) {
- if (num_instructions > kMaxInstructionsInBranch) {
- return false;
- }
return instruction->IsGoto() || instruction->IsReturn();
- } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) {
- num_instructions++;
+ } else if (instruction->CanBeMoved() &&
+ !instruction->HasSideEffects() &&
+ !instruction->CanThrow()) {
+ if (instruction->IsSelect() &&
+ instruction->AsSelect()->GetCondition()->GetBlock() == block) {
+ // Count one HCondition and HSelect in the same block as a single instruction.
+ // This enables finding nested selects.
+ continue;
+ } else if (++num_instructions > kMaxInstructionsInBranch) {
+ return false; // bail as soon as we exceed number of allowed instructions
+ }
} else {
return false;
}
@@ -85,10 +92,15 @@ static HPhi* GetSingleChangedPhi(HBasicBlock* block, size_t index1, size_t index
return select_phi;
}
-void HSelectGenerator::Run() {
+bool HSelectGenerator::Run() {
+ bool didSelect = false;
+ // Select cache with local allocator.
+ ScopedArenaAllocator allocator(graph_->GetArenaStack());
+ ScopedArenaSafeMap<HInstruction*, HSelect*> cache(
+ std::less<HInstruction*>(), allocator.Adapter(kArenaAllocSelectGenerator));
+
// Iterate in post order in the unlikely case that removing one occurrence of
// the selection pattern empties a branch block of another occurrence.
- // Otherwise the order does not matter.
for (HBasicBlock* block : graph_->GetPostOrder()) {
if (!block->EndsWithIf()) continue;
@@ -97,6 +109,7 @@ void HSelectGenerator::Run() {
HBasicBlock* true_block = if_instruction->IfTrueSuccessor();
HBasicBlock* false_block = if_instruction->IfFalseSuccessor();
DCHECK_NE(true_block, false_block);
+
if (!IsSimpleBlock(true_block) ||
!IsSimpleBlock(false_block) ||
!BlocksMergeTogether(true_block, false_block)) {
@@ -107,11 +120,15 @@ void HSelectGenerator::Run() {
// If the branches are not empty, move instructions in front of the If.
// TODO(dbrazdil): This puts an instruction between If and its condition.
// Implement moving of conditions to first users if possible.
- if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
- true_block->GetFirstInstruction()->MoveBefore(if_instruction);
+ while (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) {
+ HInstruction* instr = true_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
}
- if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
- false_block->GetFirstInstruction()->MoveBefore(if_instruction);
+ while (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) {
+ HInstruction* instr = false_block->GetFirstInstruction();
+ DCHECK(!instr->CanThrow());
+ instr->MoveBefore(if_instruction);
}
DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn());
DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn());
@@ -138,7 +155,8 @@ void HSelectGenerator::Run() {
DCHECK(both_successors_return || phi != nullptr);
// Create the Select instruction and insert it in front of the If.
- HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0),
+ HInstruction* condition = if_instruction->InputAt(0);
+ HSelect* select = new (graph_->GetAllocator()) HSelect(condition,
true_value,
false_value,
if_instruction->GetDexPc());
@@ -175,12 +193,34 @@ void HSelectGenerator::Run() {
MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated);
+ // Very simple way of finding common subexpressions in the generated HSelect statements
+ // (since this runs after GVN). Lookup by condition, and reuse latest one if possible
+ // (due to post order, latest select is most likely replacement). If needed, we could
+ // improve this by e.g. using the operands in the map as well.
+ auto it = cache.find(condition);
+ if (it == cache.end()) {
+ cache.Put(condition, select);
+ } else {
+ // Found cached value. See if latest can replace cached in the HIR.
+ HSelect* cached = it->second;
+ DCHECK_EQ(cached->GetCondition(), select->GetCondition());
+ if (cached->GetTrueValue() == select->GetTrueValue() &&
+ cached->GetFalseValue() == select->GetFalseValue() &&
+ select->StrictlyDominates(cached)) {
+ cached->ReplaceWith(select);
+ cached->GetBlock()->RemoveInstruction(cached);
+ }
+ it->second = select; // always cache latest
+ }
+
// No need to update dominance information, as we are simplifying
// a simple diamond shape, where the join block is merged with the
// entry block. Any following blocks would have had the join block
// as a dominator, and `MergeWith` handles changing that to the
// entry block.
+ didSelect = true;
}
+ return didSelect;
}
} // namespace art
diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h
index bda57fd5c8..2889166f60 100644
--- a/compiler/optimizing/select_generator.h
+++ b/compiler/optimizing/select_generator.h
@@ -68,7 +68,7 @@ class HSelectGenerator : public HOptimization {
OptimizingCompilerStats* stats,
const char* name = kSelectGeneratorPassName);
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kSelectGeneratorPassName = "select_generator";
diff --git a/compiler/optimizing/select_generator_test.cc b/compiler/optimizing/select_generator_test.cc
new file mode 100644
index 0000000000..6e6549737c
--- /dev/null
+++ b/compiler/optimizing/select_generator_test.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "select_generator.h"
+
+#include "base/arena_allocator.h"
+#include "builder.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "side_effects_analysis.h"
+
+namespace art {
+
+class SelectGeneratorTest : public ImprovedOptimizingUnitTest {
+ public:
+ void ConstructBasicGraphForSelect(HInstruction* instr) {
+ HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph_);
+
+ graph_->AddBlock(if_block);
+ graph_->AddBlock(then_block);
+ graph_->AddBlock(else_block);
+
+ entry_block_->ReplaceSuccessor(return_block_, if_block);
+
+ if_block->AddSuccessor(then_block);
+ if_block->AddSuccessor(else_block);
+ then_block->AddSuccessor(return_block_);
+ else_block->AddSuccessor(return_block_);
+
+ HParameterValue* bool_param = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
+ dex::TypeIndex(0),
+ 1,
+ DataType::Type::kBool);
+ entry_block_->AddInstruction(bool_param);
+ HIntConstant* const1 = graph_->GetIntConstant(1);
+
+ if_block->AddInstruction(new (GetAllocator()) HIf(bool_param));
+
+ then_block->AddInstruction(instr);
+ then_block->AddInstruction(new (GetAllocator()) HGoto());
+
+ else_block->AddInstruction(new (GetAllocator()) HGoto());
+
+ HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
+ return_block_->AddPhi(phi);
+ phi->AddInput(instr);
+ phi->AddInput(const1);
+ }
+
+ bool CheckGraphAndTrySelectGenerator() {
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ SideEffectsAnalysis side_effects(graph_);
+ side_effects.Run();
+ return HSelectGenerator(graph_, /*handles*/ nullptr, /*stats*/ nullptr).Run();
+ }
+};
+
+// HDivZeroCheck might throw and should not be hoisted from the conditional to an unconditional.
+TEST_F(SelectGeneratorTest, testZeroCheck) {
+ InitGraph();
+ HDivZeroCheck* instr = new (GetAllocator()) HDivZeroCheck(parameter_, 0);
+ ConstructBasicGraphForSelect(instr);
+
+ ArenaVector<HInstruction*> current_locals({parameter_, graph_->GetIntConstant(1)},
+ GetAllocator()->Adapter(kArenaAllocInstruction));
+ ManuallyBuildEnvFor(instr, &current_locals);
+
+ EXPECT_FALSE(CheckGraphAndTrySelectGenerator());
+}
+
+// Test that SelectGenerator succeeds with HAdd.
+TEST_F(SelectGeneratorTest, testAdd) {
+ InitGraph();
+ HAdd* instr = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, parameter_, 0);
+ ConstructBasicGraphForSelect(instr);
+ EXPECT_TRUE(CheckGraphAndTrySelectGenerator());
+}
+
+} // namespace art
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 1e49411c72..8637db13ad 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -21,7 +21,6 @@
#include "base/enums.h"
#include "class_linker.h"
#include "code_generator.h"
-#include "driver/compiler_driver.h"
#include "driver/compiler_options.h"
#include "driver/dex_compilation_unit.h"
#include "gc/heap.h"
@@ -36,23 +35,6 @@
namespace art {
-void HSharpening::Run() {
- // We don't care about the order of the blocks here.
- for (HBasicBlock* block : graph_->GetReversePostOrder()) {
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instruction = it.Current();
- if (instruction->IsInvokeStaticOrDirect()) {
- SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
- codegen_,
- compiler_driver_);
- }
- // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder
- // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data
- // because we know the type better when inlining.
- }
- }
-}
-
static bool IsInBootImage(ArtMethod* method) {
const std::vector<gc::space::ImageSpace*>& image_spaces =
Runtime::Current()->GetHeap()->GetBootImageSpaces();
@@ -65,34 +47,23 @@ static bool IsInBootImage(ArtMethod* method) {
return false;
}
-static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) {
- return IsInBootImage(method) && !options.GetCompilePic();
-}
-
-static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) {
- DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
- if (!compiler_driver->GetSupportBootImageFixup()) {
- return false;
- }
+static bool BootImageAOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& compiler_options) {
+ DCHECK(compiler_options.IsBootImage());
ScopedObjectAccess soa(Thread::Current());
ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
DCHECK(klass != nullptr);
const DexFile& dex_file = klass->GetDexFile();
- return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex()));
+ return compiler_options.IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex()));
}
-void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver) {
- if (invoke->IsStringInit()) {
- // Not using the dex cache arrays. But we could still try to use a better dispatch...
- // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
- return;
+HInvokeStaticOrDirect::DispatchInfo HSharpening::SharpenInvokeStaticOrDirect(
+ ArtMethod* callee, CodeGenerator* codegen) {
+ if (kIsDebugBuild) {
+ ScopedObjectAccess soa(Thread::Current()); // Required for GetDeclaringClass below.
+ DCHECK(callee != nullptr);
+ DCHECK(!(callee->IsConstructor() && callee->GetDeclaringClass()->IsStringClass()));
}
- ArtMethod* callee = invoke->GetResolvedMethod();
- DCHECK(callee != nullptr);
-
HInvokeStaticOrDirect::MethodLoadKind method_load_kind;
HInvokeStaticOrDirect::CodePtrLocation code_ptr_location;
uint64_t method_load_data = 0u;
@@ -110,23 +81,34 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
// We don't optimize for debuggable as it would prevent us from obsoleting the method in some
// situations.
+ const CompilerOptions& compiler_options = codegen->GetCompilerOptions();
if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) {
// Recursive call.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
- } else if (Runtime::Current()->UseJitCompilation() ||
- AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) {
+ } else if (compiler_options.IsBootImage()) {
+ if (!compiler_options.GetCompilePic()) {
+ // Test configuration, do not sharpen.
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall;
+ } else if (BootImageAOTCanEmbedMethod(callee, compiler_options)) {
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
+ } else {
+ // Use PC-relative access to the .bss methods array.
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
+ }
+ code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+ } else if (Runtime::Current()->UseJitCompilation()) {
// JIT or on-device AOT compilation referencing a boot image method.
// Use the method address directly.
- method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress;
method_load_data = reinterpret_cast<uintptr_t>(callee);
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
- } else if (codegen->GetCompilerOptions().IsBootImage() &&
- BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
- method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
+ } else if (IsInBootImage(callee)) {
+ // Use PC-relative access to the .data.bimg.rel.ro methods array.
+ method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
} else {
- // Use PC-relative access to the .bss methods arrays.
+ // Use PC-relative access to the .bss methods array.
method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
}
@@ -140,15 +122,12 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
HInvokeStaticOrDirect::DispatchInfo desired_dispatch_info = {
method_load_kind, code_ptr_location, method_load_data
};
- HInvokeStaticOrDirect::DispatchInfo dispatch_info =
- codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
- invoke->SetDispatchInfo(dispatch_info);
+ return codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, callee);
}
HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
HLoadClass* load_class,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
const DexCompilationUnit& dex_compilation_unit) {
Handle<mirror::Class> klass = load_class->GetClass();
DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
@@ -172,29 +151,29 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
bool is_in_boot_image = false;
HLoadClass::LoadKind desired_load_kind = HLoadClass::LoadKind::kInvalid;
Runtime* runtime = Runtime::Current();
- if (codegen->GetCompilerOptions().IsBootImage()) {
+ const CompilerOptions& compiler_options = codegen->GetCompilerOptions();
+ if (compiler_options.IsBootImage()) {
// Compiling boot image. Check if the class is a boot image class.
DCHECK(!runtime->UseJitCompilation());
- if (!compiler_driver->GetSupportBootImageFixup()) {
- // compiler_driver_test. Do not sharpen.
+ if (!compiler_options.GetCompilePic()) {
+ // Test configuration, do not sharpen.
desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
} else if ((klass != nullptr) &&
- compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) {
+ compiler_options.IsImageClass(dex_file.StringByTypeIdx(type_index))) {
is_in_boot_image = true;
desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
} else {
// Not a boot image class.
- DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
+ DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file));
desired_load_kind = HLoadClass::LoadKind::kBssEntry;
}
} else {
is_in_boot_image = (klass != nullptr) &&
runtime->GetHeap()->ObjectIsInBootImageSpace(klass.Get());
if (runtime->UseJitCompilation()) {
- DCHECK(!codegen->GetCompilerOptions().GetCompilePic());
+ DCHECK(!compiler_options.GetCompilePic());
if (is_in_boot_image) {
- // TODO: Use direct pointers for all non-moving spaces, not just boot image. Bug: 29530787
- desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
+ desired_load_kind = HLoadClass::LoadKind::kJitBootImageAddress;
} else if (klass != nullptr) {
desired_load_kind = HLoadClass::LoadKind::kJitTableAddress;
} else {
@@ -206,11 +185,7 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
}
} else if (is_in_boot_image) {
// AOT app compilation, boot image class.
- if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable;
- } else {
- desired_load_kind = HLoadClass::LoadKind::kBootImageAddress;
- }
+ desired_load_kind = HLoadClass::LoadKind::kBootImageRelRo;
} else {
// Not JIT and the klass is not in boot image.
desired_load_kind = HLoadClass::LoadKind::kBssEntry;
@@ -236,10 +211,75 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(
return load_kind;
}
+static inline bool CanUseTypeCheckBitstring(ObjPtr<mirror::Class> klass, CodeGenerator* codegen)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ DCHECK(!klass->IsProxyClass());
+ DCHECK(!klass->IsArrayClass());
+
+ if (Runtime::Current()->UseJitCompilation()) {
+ // If we're JITting, try to assign a type check bitstring (fall through).
+ } else if (codegen->GetCompilerOptions().IsBootImage()) {
+ const char* descriptor = klass->GetDexFile().StringByTypeIdx(klass->GetDexTypeIndex());
+ if (!codegen->GetCompilerOptions().IsImageClass(descriptor)) {
+ return false;
+ }
+ // If the target is a boot image class, try to assign a type check bitstring (fall through).
+ // (If --force-determinism, this was already done; repeating is OK and yields the same result.)
+ } else {
+ // TODO: Use the bitstring also for AOT app compilation if the target class has a bitstring
+ // already assigned in the boot image.
+ return false;
+ }
+
+ // Try to assign a type check bitstring.
+ MutexLock subtype_check_lock(Thread::Current(), *Locks::subtype_check_lock_);
+ if ((false) && // FIXME: Inliner does not respect CompilerDriver::ShouldCompileMethod()
+ // and we're hitting an unassigned bitstring in dex2oat_image_test. b/26687569
+ kIsDebugBuild &&
+ codegen->GetCompilerOptions().IsBootImage() &&
+ codegen->GetCompilerOptions().IsForceDeterminism()) {
+ SubtypeCheckInfo::State old_state = SubtypeCheck<ObjPtr<mirror::Class>>::GetState(klass);
+ CHECK(old_state == SubtypeCheckInfo::kAssigned || old_state == SubtypeCheckInfo::kOverflowed)
+ << klass->PrettyDescriptor() << "/" << old_state
+ << " in " << codegen->GetGraph()->PrettyMethod();
+ }
+ SubtypeCheckInfo::State state = SubtypeCheck<ObjPtr<mirror::Class>>::EnsureAssigned(klass);
+ return state == SubtypeCheckInfo::kAssigned;
+}
+
+TypeCheckKind HSharpening::ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ bool needs_access_check) {
+ if (klass == nullptr) {
+ return TypeCheckKind::kUnresolvedCheck;
+ } else if (klass->IsInterface()) {
+ return TypeCheckKind::kInterfaceCheck;
+ } else if (klass->IsArrayClass()) {
+ if (klass->GetComponentType()->IsObjectClass()) {
+ return TypeCheckKind::kArrayObjectCheck;
+ } else if (klass->CannotBeAssignedFromOtherTypes()) {
+ return TypeCheckKind::kExactCheck;
+ } else {
+ return TypeCheckKind::kArrayCheck;
+ }
+ } else if (klass->IsFinal()) { // TODO: Consider using bitstring for final classes.
+ return TypeCheckKind::kExactCheck;
+ } else if (kBitstringSubtypeCheckEnabled &&
+ !needs_access_check &&
+ CanUseTypeCheckBitstring(klass, codegen)) {
+ // TODO: We should not need the `!needs_access_check` check but getting rid of that
+ // requires rewriting some optimizations in instruction simplifier.
+ return TypeCheckKind::kBitstringCheck;
+ } else if (klass->IsAbstract()) {
+ return TypeCheckKind::kAbstractClassCheck;
+ } else {
+ return TypeCheckKind::kClassHierarchyCheck;
+ }
+}
+
void HSharpening::ProcessLoadString(
HLoadString* load_string,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
const DexCompilationUnit& dex_compilation_unit,
VariableSizedHandleScope* handles) {
DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
@@ -258,17 +298,33 @@ void HSharpening::ProcessLoadString(
: hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
ObjPtr<mirror::String> string = nullptr;
- if (codegen->GetCompilerOptions().IsBootImage()) {
+ const CompilerOptions& compiler_options = codegen->GetCompilerOptions();
+ if (compiler_options.IsBootImage()) {
// Compiling boot image. Resolve the string and allocate it if needed, to ensure
// the string will be added to the boot image.
DCHECK(!runtime->UseJitCompilation());
- string = class_linker->ResolveString(string_index, dex_cache);
- CHECK(string != nullptr);
- if (compiler_driver->GetSupportBootImageFixup()) {
- DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file));
- desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
+ if (compiler_options.GetCompilePic()) {
+ DCHECK(ContainsElement(compiler_options.GetDexFilesForOatFile(), &dex_file));
+ if (compiler_options.IsForceDeterminism()) {
+ // Strings for methods we're compiling should be pre-resolved but Strings in inlined
+ // methods may not be if these inlined methods are not in the boot image profile.
+ // Multiple threads allocating new Strings can cause non-deterministic boot image
+ // because of the image relying on the order of GC roots we walk. (We could fix that
+ // by ordering the roots we walk in ImageWriter.) Therefore we avoid allocating these
+ // strings even if that results in omitting them from the boot image and using the
+ // sub-optimal load kind kBssEntry.
+ string = class_linker->LookupString(string_index, dex_cache.Get());
+ } else {
+ string = class_linker->ResolveString(string_index, dex_cache);
+ CHECK(string != nullptr);
+ }
+ if (string != nullptr) {
+ desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
+ } else {
+ desired_load_kind = HLoadString::LoadKind::kBssEntry;
+ }
} else {
- // compiler_driver_test. Do not sharpen.
+ // Test configuration, do not sharpen.
desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
}
} else if (runtime->UseJitCompilation()) {
@@ -276,7 +332,7 @@ void HSharpening::ProcessLoadString(
string = class_linker->LookupString(string_index, dex_cache.Get());
if (string != nullptr) {
if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
- desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
+ desired_load_kind = HLoadString::LoadKind::kJitBootImageAddress;
} else {
desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
}
@@ -287,11 +343,7 @@ void HSharpening::ProcessLoadString(
// AOT app compilation. Try to lookup the string without allocating if not found.
string = class_linker->LookupString(string_index, dex_cache.Get());
if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
- if (codegen->GetCompilerOptions().GetCompilePic()) {
- desired_load_kind = HLoadString::LoadKind::kBootImageInternTable;
- } else {
- desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
- }
+ desired_load_kind = HLoadString::LoadKind::kBootImageRelRo;
} else {
desired_load_kind = HLoadString::LoadKind::kBssEntry;
}
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 6df7d6d91e..b81867201f 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -23,49 +23,33 @@
namespace art {
class CodeGenerator;
-class CompilerDriver;
class DexCompilationUnit;
-// Optimization that tries to improve the way we dispatch methods and access types,
-// fields, etc. Besides actual method sharpening based on receiver type (for example
-// virtual->direct), this includes selecting the best available dispatch for
-// invoke-static/-direct based on code generator support.
-class HSharpening : public HOptimization {
+// Utility methods that try to improve the way we dispatch methods, and access
+// types and strings.
+class HSharpening {
public:
- HSharpening(HGraph* graph,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const char* name = kSharpeningPassName)
- : HOptimization(graph, name),
- codegen_(codegen),
- compiler_driver_(compiler_driver) { }
-
- void Run() OVERRIDE;
-
- static constexpr const char* kSharpeningPassName = "sharpening";
-
- // Used by the builder.
- static void ProcessLoadString(HLoadString* load_string,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
- const DexCompilationUnit& dex_compilation_unit,
- VariableSizedHandleScope* handles);
+ // Used by the builder and InstructionSimplifier.
+ static HInvokeStaticOrDirect::DispatchInfo SharpenInvokeStaticOrDirect(
+ ArtMethod* callee, CodeGenerator* codegen);
// Used by the builder and the inliner.
static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver,
const DexCompilationUnit& dex_compilation_unit)
REQUIRES_SHARED(Locks::mutator_lock_);
- // Used by Sharpening and InstructionSimplifier.
- static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
- CodeGenerator* codegen,
- CompilerDriver* compiler_driver);
+ // Used by the builder.
+ static TypeCheckKind ComputeTypeCheckKind(ObjPtr<mirror::Class> klass,
+ CodeGenerator* codegen,
+ bool needs_access_check)
+ REQUIRES_SHARED(Locks::mutator_lock_);
- private:
- CodeGenerator* codegen_;
- CompilerDriver* compiler_driver_;
+ // Used by the builder.
+ static void ProcessLoadString(HLoadString* load_string,
+ CodeGenerator* codegen,
+ const DexCompilationUnit& dex_compilation_unit,
+ VariableSizedHandleScope* handles);
};
} // namespace art
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 6d82e8e06d..ba97b43de9 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -18,7 +18,7 @@
namespace art {
-void SideEffectsAnalysis::Run() {
+bool SideEffectsAnalysis::Run() {
// Inlining might have created more blocks, so we need to increase the size
// if needed.
block_effects_.resize(graph_->GetBlocks().size());
@@ -69,6 +69,7 @@ void SideEffectsAnalysis::Run() {
}
}
has_run_ = true;
+ return true;
}
SideEffects SideEffectsAnalysis::GetLoopEffects(HBasicBlock* block) const {
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index c0f81a9c54..56a01e63f1 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -37,7 +37,7 @@ class SideEffectsAnalysis : public HOptimization {
SideEffects GetBlockEffects(HBasicBlock* block) const;
// Compute side effects of individual blocks and loops.
- void Run();
+ bool Run();
bool HasRun() const { return has_run_; }
diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc
index 97317124ef..cf26e79c69 100644
--- a/compiler/optimizing/side_effects_test.cc
+++ b/compiler/optimizing/side_effects_test.cc
@@ -141,13 +141,13 @@ TEST(SideEffectsTest, NoDependences) {
TEST(SideEffectsTest, VolatileDependences) {
SideEffects volatile_write =
- SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ true);
+ SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ true);
SideEffects any_write =
- SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false);
+ SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false);
SideEffects volatile_read =
- SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ true);
+ SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ true);
SideEffects any_read =
- SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ false);
+ SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile= */ false);
EXPECT_FALSE(volatile_write.MayDependOn(any_read));
EXPECT_TRUE(any_read.MayDependOn(volatile_write));
@@ -163,15 +163,15 @@ TEST(SideEffectsTest, VolatileDependences) {
TEST(SideEffectsTest, SameWidthTypesNoAlias) {
// Type I/F.
testNoWriteAndReadDependence(
- SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false),
- SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile */ false));
+ SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile= */ false),
+ SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile= */ false));
testNoWriteAndReadDependence(
SideEffects::ArrayWriteOfType(DataType::Type::kInt32),
SideEffects::ArrayReadOfType(DataType::Type::kFloat32));
// Type L/D.
testNoWriteAndReadDependence(
- SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false),
- SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile */ false));
+ SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false),
+ SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile= */ false));
testNoWriteAndReadDependence(
SideEffects::ArrayWriteOfType(DataType::Type::kInt64),
SideEffects::ArrayReadOfType(DataType::Type::kFloat64));
@@ -181,9 +181,9 @@ TEST(SideEffectsTest, AllWritesAndReads) {
SideEffects s = SideEffects::None();
// Keep taking the union of different writes and reads.
for (DataType::Type type : kTestTypes) {
- s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false));
+ s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile= */ false));
s = s.Union(SideEffects::ArrayWriteOfType(type));
- s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false));
+ s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile= */ false));
s = s.Union(SideEffects::ArrayReadOfType(type));
}
EXPECT_TRUE(s.DoesAllReadWrite());
@@ -202,6 +202,7 @@ TEST(SideEffectsTest, GC) {
EXPECT_TRUE(depends_on_gc.MayDependOn(all_changes));
EXPECT_TRUE(depends_on_gc.Union(can_trigger_gc).MayDependOn(all_changes));
EXPECT_FALSE(can_trigger_gc.MayDependOn(all_changes));
+ EXPECT_FALSE(can_trigger_gc.MayDependOn(can_trigger_gc));
EXPECT_TRUE(all_changes.Includes(can_trigger_gc));
EXPECT_FALSE(all_changes.Includes(depends_on_gc));
@@ -253,10 +254,10 @@ TEST(SideEffectsTest, BitStrings) {
"||I|||||",
SideEffects::ArrayReadOfType(DataType::Type::kInt32).ToString().c_str());
SideEffects s = SideEffects::None();
- s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile */ false));
- s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false));
+ s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile= */ false));
+ s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile= */ false));
s = s.Union(SideEffects::ArrayWriteOfType(DataType::Type::kInt16));
- s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile */ false));
+ s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile= */ false));
s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat32));
s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat64));
EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str());
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index dd54468217..0d0e1ecf1f 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -16,6 +16,9 @@
#include "ssa_builder.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_vector-inl.h"
+#include "base/logging.h"
#include "data_type-inl.h"
#include "dex/bytecode_utils.h"
#include "mirror/class-inl.h"
@@ -388,7 +391,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() {
// succeed in code validated by the verifier.
HInstruction* equivalent = GetFloatOrDoubleEquivalent(value, array_type);
DCHECK(equivalent != nullptr);
- aset->ReplaceInput(equivalent, /* input_index */ 2);
+ aset->ReplaceInput(equivalent, /* index= */ 2);
if (equivalent->IsPhi()) {
// Returned equivalent is a phi which may not have had its inputs
// replaced yet. We need to run primitive type propagation on it.
@@ -415,29 +418,36 @@ bool SsaBuilder::FixAmbiguousArrayOps() {
return true;
}
-static bool HasAliasInEnvironments(HInstruction* instruction) {
- HEnvironment* last_user = nullptr;
+bool SsaBuilder::HasAliasInEnvironments(HInstruction* instruction) {
+ ScopedArenaHashSet<size_t> seen_users(
+ local_allocator_->Adapter(kArenaAllocGraphBuilder));
for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
DCHECK(use.GetUser() != nullptr);
- // Note: The first comparison (== null) always fails.
- if (use.GetUser() == last_user) {
+ size_t id = use.GetUser()->GetHolder()->GetId();
+ if (seen_users.find(id) != seen_users.end()) {
return true;
}
- last_user = use.GetUser();
+ seen_users.insert(id);
}
+ return false;
+}
- if (kIsDebugBuild) {
- // Do a quadratic search to ensure same environment uses are next
- // to each other.
- const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses();
- for (auto current = env_uses.begin(), end = env_uses.end(); current != end; ++current) {
- auto next = current;
- for (++next; next != end; ++next) {
- DCHECK(next->GetUser() != current->GetUser());
- }
+bool SsaBuilder::ReplaceUninitializedStringPhis() {
+ for (HInvoke* invoke : uninitialized_string_phis_) {
+ HInstruction* str = invoke->InputAt(invoke->InputCount() - 1);
+ if (str->IsPhi()) {
+ // If after redundant phi and dead phi elimination, it's still a phi that feeds
+ // the invoke, then we must be compiling a method with irreducible loops. Just bail.
+ DCHECK(graph_->HasIrreducibleLoops());
+ return false;
}
+ DCHECK(str->IsNewInstance());
+ AddUninitializedString(str->AsNewInstance());
+ str->ReplaceUsesDominatedBy(invoke, invoke);
+ str->ReplaceEnvUsesDominatedBy(invoke, invoke);
+ invoke->RemoveInputAt(invoke->InputCount() - 1);
}
- return false;
+ return true;
}
void SsaBuilder::RemoveRedundantUninitializedStrings() {
@@ -452,8 +462,9 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() {
DCHECK(new_instance->IsStringAlloc());
// Replace NewInstance of String with NullConstant if not used prior to
- // calling StringFactory. In case of deoptimization, the interpreter is
- // expected to skip null check on the `this` argument of the StringFactory call.
+ // calling StringFactory. We check for alias environments in case of deoptimization.
+ // The interpreter is expected to skip null check on the `this` argument of the
+ // StringFactory call.
if (!new_instance->HasNonEnvironmentUses() && !HasAliasInEnvironments(new_instance)) {
new_instance->ReplaceWith(graph_->GetNullConstant());
new_instance->GetBlock()->RemoveInstruction(new_instance);
@@ -488,35 +499,35 @@ void SsaBuilder::RemoveRedundantUninitializedStrings() {
GraphAnalysisResult SsaBuilder::BuildSsa() {
DCHECK(!graph_->IsInSsaForm());
- // 1) Propagate types of phis. At this point, phis are typed void in the general
+ // Propagate types of phis. At this point, phis are typed void in the general
// case, or float/double/reference if we created an equivalent phi. So we need
// to propagate the types across phis to give them a correct type. If a type
// conflict is detected in this stage, the phi is marked dead.
RunPrimitiveTypePropagation();
- // 2) Now that the correct primitive types have been assigned, we can get rid
+ // Now that the correct primitive types have been assigned, we can get rid
// of redundant phis. Note that we cannot do this phase before type propagation,
// otherwise we could get rid of phi equivalents, whose presence is a requirement
// for the type propagation phase. Note that this is to satisfy statement (a)
// of the SsaBuilder (see ssa_builder.h).
SsaRedundantPhiElimination(graph_).Run();
- // 3) Fix the type for null constants which are part of an equality comparison.
+ // Fix the type for null constants which are part of an equality comparison.
// We need to do this after redundant phi elimination, to ensure the only cases
// that we can see are reference comparison against 0. The redundant phi
// elimination ensures we do not see a phi taking two 0 constants in a HEqual
// or HNotEqual.
FixNullConstantType();
- // 4) Compute type of reference type instructions. The pass assumes that
+ // Compute type of reference type instructions. The pass assumes that
// NullConstant has been fixed up.
ReferenceTypePropagation(graph_,
class_loader_,
dex_cache_,
handles_,
- /* is_first_run */ true).Run();
+ /* is_first_run= */ true).Run();
- // 5) HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
+ // HInstructionBuilder duplicated ArrayGet instructions with ambiguous type
// (int/float or long/double) and marked ArraySets with ambiguous input type.
// Now that RTP computed the type of the array input, the ambiguity can be
// resolved and the correct equivalents kept.
@@ -524,13 +535,13 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
return kAnalysisFailAmbiguousArrayOp;
}
- // 6) Mark dead phis. This will mark phis which are not used by instructions
+ // Mark dead phis. This will mark phis which are not used by instructions
// or other live phis. If compiling as debuggable code, phis will also be kept
// live if they have an environment use.
SsaDeadPhiElimination dead_phi_elimimation(graph_);
dead_phi_elimimation.MarkDeadPhis();
- // 7) Make sure environments use the right phi equivalent: a phi marked dead
+ // Make sure environments use the right phi equivalent: a phi marked dead
// can have a phi equivalent that is not dead. In that case we have to replace
// it with the live equivalent because deoptimization and try/catch rely on
// environments containing values of all live vregs at that point. Note that
@@ -539,14 +550,22 @@ GraphAnalysisResult SsaBuilder::BuildSsa() {
// environments to just reference one.
FixEnvironmentPhis();
- // 8) Now that the right phis are used for the environments, we can eliminate
+ // Now that the right phis are used for the environments, we can eliminate
// phis we do not need. Regardless of the debuggable status, this phase is
/// necessary for statement (b) of the SsaBuilder (see ssa_builder.h), as well
// as for the code generation, which does not deal with phis of conflicting
// input types.
dead_phi_elimimation.EliminateDeadPhis();
- // 9) HInstructionBuidler replaced uses of NewInstances of String with the
+ // Replace Phis that feed in a String.<init> during instruction building. We
+ // run this after redundant and dead phi elimination to make sure the phi will have
+ // been replaced by the actual allocation. Only with an irreducible loop
+ // a phi can still be the input, in which case we bail.
+ if (!ReplaceUninitializedStringPhis()) {
+ return kAnalysisFailIrreducibleLoopAndStringInit;
+ }
+
+ // HInstructionBuidler replaced uses of NewInstances of String with the
// results of their corresponding StringFactory calls. Unless the String
// objects are used before they are initialized, they can be replaced with
// NullConstant. Note that this optimization is valid only if unsimplified
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index 60831a9e6a..bb892c9304 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -61,7 +61,8 @@ class SsaBuilder : public ValueObject {
local_allocator_(local_allocator),
ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
- uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) {
+ uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)),
+ uninitialized_string_phis_(local_allocator->Adapter(kArenaAllocGraphBuilder)) {
graph_->InitializeInexactObjectRTI(handles);
}
@@ -96,6 +97,10 @@ class SsaBuilder : public ValueObject {
}
}
+ void AddUninitializedStringPhi(HInvoke* invoke) {
+ uninitialized_string_phis_.push_back(invoke);
+ }
+
private:
void SetLoopHeaderPhiInputs();
void FixEnvironmentPhis();
@@ -118,6 +123,8 @@ class SsaBuilder : public ValueObject {
HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget);
void RemoveRedundantUninitializedStrings();
+ bool ReplaceUninitializedStringPhis();
+ bool HasAliasInEnvironments(HInstruction* instruction);
HGraph* const graph_;
Handle<mirror::ClassLoader> class_loader_;
@@ -131,6 +138,7 @@ class SsaBuilder : public ValueObject {
ScopedArenaVector<HArrayGet*> ambiguous_agets_;
ScopedArenaVector<HArraySet*> ambiguous_asets_;
ScopedArenaVector<HNewInstance*> uninitialized_strings_;
+ ScopedArenaVector<HInvoke*> uninitialized_string_phis_;
DISALLOW_COPY_AND_ASSIGN(SsaBuilder);
};
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index f6bd05269e..7b2c3a939c 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -103,9 +103,9 @@ void SsaLivenessAnalysis::ComputeLiveness() {
ComputeLiveInAndLiveOutSets();
}
-static void RecursivelyProcessInputs(HInstruction* current,
- HInstruction* actual_user,
- BitVector* live_in) {
+void SsaLivenessAnalysis::RecursivelyProcessInputs(HInstruction* current,
+ HInstruction* actual_user,
+ BitVector* live_in) {
HInputsRef inputs = current->GetInputs();
for (size_t i = 0; i < inputs.size(); ++i) {
HInstruction* input = inputs[i];
@@ -120,7 +120,7 @@ static void RecursivelyProcessInputs(HInstruction* current,
DCHECK(input->HasSsaIndex());
// `input` generates a result used by `current`. Add use and update
// the live-in set.
- input->GetLiveInterval()->AddUse(current, /* environment */ nullptr, i, actual_user);
+ input->GetLiveInterval()->AddUse(current, /* environment= */ nullptr, i, actual_user);
live_in->SetBit(input->GetSsaIndex());
} else if (has_out_location) {
// `input` generates a result but it is not used by `current`.
@@ -131,11 +131,40 @@ static void RecursivelyProcessInputs(HInstruction* current,
// Check that the inlined input is not a phi. Recursing on loop phis could
// lead to an infinite loop.
DCHECK(!input->IsPhi());
+ DCHECK(!input->HasEnvironment());
RecursivelyProcessInputs(input, actual_user, live_in);
}
}
}
+void SsaLivenessAnalysis::ProcessEnvironment(HInstruction* current,
+ HInstruction* actual_user,
+ BitVector* live_in) {
+ for (HEnvironment* environment = current->GetEnvironment();
+ environment != nullptr;
+ environment = environment->GetParent()) {
+ // Handle environment uses. See statements (b) and (c) of the
+ // SsaLivenessAnalysis.
+ for (size_t i = 0, e = environment->Size(); i < e; ++i) {
+ HInstruction* instruction = environment->GetInstructionAt(i);
+ if (instruction == nullptr) {
+ continue;
+ }
+ bool should_be_live = ShouldBeLiveForEnvironment(current, instruction);
+ // If this environment use does not keep the instruction live, it does not
+ // affect the live range of that instruction.
+ if (should_be_live) {
+ CHECK(instruction->HasSsaIndex()) << instruction->DebugName();
+ live_in->SetBit(instruction->GetSsaIndex());
+ instruction->GetLiveInterval()->AddUse(current,
+ environment,
+ i,
+ actual_user);
+ }
+ }
+ }
+}
+
void SsaLivenessAnalysis::ComputeLiveRanges() {
// Do a post order visit, adding inputs of instructions live in the block where
// that instruction is defined, and killing instructions that are being visited.
@@ -186,27 +215,6 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
current->GetLiveInterval()->SetFrom(current->GetLifetimePosition());
}
- // Process the environment first, because we know their uses come after
- // or at the same liveness position of inputs.
- for (HEnvironment* environment = current->GetEnvironment();
- environment != nullptr;
- environment = environment->GetParent()) {
- // Handle environment uses. See statements (b) and (c) of the
- // SsaLivenessAnalysis.
- for (size_t i = 0, e = environment->Size(); i < e; ++i) {
- HInstruction* instruction = environment->GetInstructionAt(i);
- bool should_be_live = ShouldBeLiveForEnvironment(current, instruction);
- if (should_be_live) {
- CHECK(instruction->HasSsaIndex()) << instruction->DebugName();
- live_in->SetBit(instruction->GetSsaIndex());
- }
- if (instruction != nullptr) {
- instruction->GetLiveInterval()->AddUse(
- current, environment, i, /* actual_user */ nullptr, should_be_live);
- }
- }
- }
-
// Process inputs of instructions.
if (current->IsEmittedAtUseSite()) {
if (kIsDebugBuild) {
@@ -219,6 +227,16 @@ void SsaLivenessAnalysis::ComputeLiveRanges() {
DCHECK(!current->HasEnvironmentUses());
}
} else {
+ // Process the environment first, because we know their uses come after
+ // or at the same liveness position of inputs.
+ ProcessEnvironment(current, current, live_in);
+
+ // Special case implicit null checks. We want their environment uses to be
+ // emitted at the instruction doing the actual null check.
+ HNullCheck* check = current->GetImplicitNullCheck();
+ if (check != nullptr) {
+ ProcessEnvironment(check, current, live_in);
+ }
RecursivelyProcessInputs(current, current, live_in);
}
}
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index f83bb52b69..c88390775c 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -60,7 +60,7 @@ class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> {
* A live range contains the start and end of a range where an instruction or a temporary
* is live.
*/
-class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> {
+class LiveRange final : public ArenaObject<kArenaAllocSsaLiveness> {
public:
LiveRange(size_t start, size_t end, LiveRange* next) : start_(start), end_(end), next_(next) {
DCHECK_LT(start, end);
@@ -230,12 +230,25 @@ class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
: instruction_(instruction),
next_(nullptr) {}
+ static size_t ComputePosition(HInstruction* instruction) {
+ // We special case instructions emitted at use site, as their
+ // safepoint position needs to be at their use.
+ if (instruction->IsEmittedAtUseSite()) {
+ // Currently only applies to implicit null checks, which are emitted
+ // at the next instruction.
+ DCHECK(instruction->IsNullCheck()) << instruction->DebugName();
+ return instruction->GetLifetimePosition() + 2;
+ } else {
+ return instruction->GetLifetimePosition();
+ }
+ }
+
void SetNext(SafepointPosition* next) {
next_ = next;
}
size_t GetPosition() const {
- return instruction_->GetLifetimePosition();
+ return ComputePosition(instruction_);
}
SafepointPosition* GetNext() const {
@@ -300,8 +313,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
void AddUse(HInstruction* instruction,
HEnvironment* environment,
size_t input_index,
- HInstruction* actual_user = nullptr,
- bool keep_alive = false) {
+ HInstruction* actual_user = nullptr) {
bool is_environment = (environment != nullptr);
LocationSummary* locations = instruction->GetLocations();
if (actual_user == nullptr) {
@@ -359,12 +371,6 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
uses_.push_front(*new_use);
}
- if (is_environment && !keep_alive) {
- // If this environment use does not keep the instruction live, it does not
- // affect the live range of that instruction.
- return;
- }
-
size_t start_block_position = instruction->GetBlock()->GetLifetimeStart();
if (first_range_ == nullptr) {
// First time we see a use of that interval.
@@ -929,7 +935,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
if (first_safepoint_ == nullptr) {
first_safepoint_ = last_safepoint_ = safepoint;
} else {
- DCHECK_LT(last_safepoint_->GetPosition(), safepoint->GetPosition());
+ DCHECK_LE(last_safepoint_->GetPosition(), safepoint->GetPosition());
last_safepoint_->SetNext(safepoint);
last_safepoint_ = safepoint;
}
@@ -1149,16 +1155,20 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> {
*
* (a) Non-environment uses of an instruction always make
* the instruction live.
- * (b) Environment uses of an instruction whose type is
- * object (that is, non-primitive), make the instruction live.
- * This is due to having to keep alive objects that have
- * finalizers deleting native objects.
+ * (b) Environment uses of an instruction whose type is object (that is, non-primitive), make the
+ * instruction live, unless the class has an @DeadReferenceSafe annotation.
+ * This avoids unexpected premature reference enqueuing or finalization, which could
+ * result in premature deletion of native objects. In the presence of @DeadReferenceSafe,
+ * object references are treated like primitive types.
* (c) When the graph has the debuggable property, environment uses
* of an instruction that has a primitive type make the instruction live.
* If the graph does not have the debuggable property, the environment
* use has no effect, and may get a 'none' value after register allocation.
+ * (d) When compiling in OSR mode, all loops in the compiled method may be entered
+ * from the interpreter via SuspendCheck; such use in SuspendCheck makes the instruction
+ * live.
*
- * (b) and (c) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment.
+ * (b), (c) and (d) are implemented through SsaLivenessAnalysis::ShouldBeLiveForEnvironment.
*/
class SsaLivenessAnalysis : public ValueObject {
public:
@@ -1256,17 +1266,29 @@ class SsaLivenessAnalysis : public ValueObject {
// Update the live_out set of the block and returns whether it has changed.
bool UpdateLiveOut(const HBasicBlock& block);
+ static void ProcessEnvironment(HInstruction* instruction,
+ HInstruction* actual_user,
+ BitVector* live_in);
+ static void RecursivelyProcessInputs(HInstruction* instruction,
+ HInstruction* actual_user,
+ BitVector* live_in);
+
// Returns whether `instruction` in an HEnvironment held by `env_holder`
// should be kept live by the HEnvironment.
static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) {
- if (instruction == nullptr) return false;
+ DCHECK(instruction != nullptr);
// A value that's not live in compiled code may still be needed in interpreter,
// due to code motion, etc.
if (env_holder->IsDeoptimize()) return true;
// A value live at a throwing instruction in a try block may be copied by
// the exception handler to its location at the top of the catch block.
if (env_holder->CanThrowIntoCatchBlock()) return true;
- if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true;
+ HGraph* graph = instruction->GetBlock()->GetGraph();
+ if (graph->IsDebuggable()) return true;
+ // When compiling in OSR mode, all loops in the compiled method may be entered
+ // from the interpreter via SuspendCheck; thus we need to preserve the environment.
+ if (env_holder->IsSuspendCheck() && graph->IsCompilingOsr()) return true;
+ if (graph -> IsDeadReferenceSafe()) return false;
return instruction->GetType() == DataType::Type::kReference;
}
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
index b9bfbaa173..352c44f63a 100644
--- a/compiler/optimizing/ssa_liveness_analysis_test.cc
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -28,18 +28,11 @@
namespace art {
class SsaLivenessAnalysisTest : public OptimizingUnitTest {
- public:
- SsaLivenessAnalysisTest()
- : graph_(CreateGraph()),
- compiler_options_(),
- instruction_set_(kRuntimeISA) {
- std::string error_msg;
- instruction_set_features_ =
- InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg);
- codegen_ = CodeGenerator::Create(graph_,
- instruction_set_,
- *instruction_set_features_,
- compiler_options_);
+ protected:
+ void SetUp() override {
+ OptimizingUnitTest::SetUp();
+ graph_ = CreateGraph();
+ codegen_ = CodeGenerator::Create(graph_, *compiler_options_);
CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture.";
// Create entry block.
entry_ = new (GetAllocator()) HBasicBlock(graph_);
@@ -57,9 +50,6 @@ class SsaLivenessAnalysisTest : public OptimizingUnitTest {
}
HGraph* graph_;
- CompilerOptions compiler_options_;
- InstructionSet instruction_set_;
- std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
std::unique_ptr<CodeGenerator> codegen_;
HBasicBlock* entry_;
};
@@ -104,25 +94,25 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) {
HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0);
block->AddInstruction(null_check);
HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(),
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
+ /* number_of_vregs= */ 5,
+ /* method= */ nullptr,
+ /* dex_pc= */ 0u,
null_check);
null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args));
null_check->SetRawEnvironment(null_check_env);
HInstruction* length = new (GetAllocator()) HArrayLength(array, 0);
block->AddInstruction(length);
- HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc */ 0u);
+ HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc= */ 0u);
block->AddInstruction(bounds_check);
HEnvironment* bounds_check_env = new (GetAllocator()) HEnvironment(GetAllocator(),
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
+ /* number_of_vregs= */ 5,
+ /* method= */ nullptr,
+ /* dex_pc= */ 0u,
bounds_check);
bounds_check_env->CopyFrom(ArrayRef<HInstruction* const>(args));
bounds_check->SetRawEnvironment(bounds_check_env);
HInstruction* array_set =
- new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0);
+ new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0);
block->AddInstruction(array_set);
graph_->BuildDominatorTree();
@@ -134,12 +124,12 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) {
static const char* const expected[] = {
"ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
"is_high: 0",
- "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "ranges: { [4,21) }, uses: { 19 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 "
"is_high: 0",
- "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "ranges: { [6,21) }, uses: { 21 }, { } is_fixed: 0, is_split: 0 is_low: 0 "
"is_high: 0",
// Environment uses do not keep the non-reference argument alive.
- "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ "ranges: { [8,10) }, uses: { }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
// Environment uses keep the reference argument alive.
"ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
};
@@ -173,9 +163,9 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0);
block->AddInstruction(null_check);
HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(),
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
+ /* number_of_vregs= */ 5,
+ /* method= */ nullptr,
+ /* dex_pc= */ 0u,
null_check);
null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args));
null_check->SetRawEnvironment(null_check_env);
@@ -185,17 +175,17 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
HInstruction* ae = new (GetAllocator()) HAboveOrEqual(index, length);
block->AddInstruction(ae);
HInstruction* deoptimize = new(GetAllocator()) HDeoptimize(
- GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u);
+ GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc= */ 0u);
block->AddInstruction(deoptimize);
HEnvironment* deoptimize_env = new (GetAllocator()) HEnvironment(GetAllocator(),
- /* number_of_vregs */ 5,
- /* method */ nullptr,
- /* dex_pc */ 0u,
+ /* number_of_vregs= */ 5,
+ /* method= */ nullptr,
+ /* dex_pc= */ 0u,
deoptimize);
deoptimize_env->CopyFrom(ArrayRef<HInstruction* const>(args));
deoptimize->SetRawEnvironment(deoptimize_env);
HInstruction* array_set =
- new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0);
+ new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc= */ 0);
block->AddInstruction(array_set);
graph_->BuildDominatorTree();
@@ -207,11 +197,11 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
static const char* const expected[] = {
"ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
"is_high: 0",
- "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+ "ranges: { [4,23) }, uses: { 19 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 "
"is_high: 0",
- "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ "ranges: { [6,23) }, uses: { 23 }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
// Environment use in HDeoptimize keeps even the non-reference argument alive.
- "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+ "ranges: { [8,21) }, uses: { }, { 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
// Environment uses keep the reference argument alive.
"ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
};
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index cb27ded17a..3fcb72e4fb 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -23,9 +23,10 @@
namespace art {
-void SsaDeadPhiElimination::Run() {
+bool SsaDeadPhiElimination::Run() {
MarkDeadPhis();
EliminateDeadPhis();
+ return true;
}
void SsaDeadPhiElimination::MarkDeadPhis() {
@@ -122,7 +123,7 @@ void SsaDeadPhiElimination::EliminateDeadPhis() {
}
}
-void SsaRedundantPhiElimination::Run() {
+bool SsaRedundantPhiElimination::Run() {
// Use local allocator for allocating memory used by this optimization.
ScopedArenaAllocator allocator(graph_->GetArenaStack());
@@ -140,7 +141,7 @@ void SsaRedundantPhiElimination::Run() {
ArenaBitVector visited_phis_in_cycle(&allocator,
graph_->GetCurrentInstructionId(),
- /* expandable */ false,
+ /* expandable= */ false,
kArenaAllocSsaPhiElimination);
visited_phis_in_cycle.ClearAllBits();
ScopedArenaVector<HPhi*> cycle_worklist(allocator.Adapter(kArenaAllocSsaPhiElimination));
@@ -255,6 +256,7 @@ void SsaRedundantPhiElimination::Run() {
current->GetBlock()->RemovePhi(current);
}
}
+ return true;
}
} // namespace art
diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h
index 11d5837eb5..c5cc752ffc 100644
--- a/compiler/optimizing/ssa_phi_elimination.h
+++ b/compiler/optimizing/ssa_phi_elimination.h
@@ -31,7 +31,7 @@ class SsaDeadPhiElimination : public HOptimization {
explicit SsaDeadPhiElimination(HGraph* graph)
: HOptimization(graph, kSsaDeadPhiEliminationPassName) {}
- void Run() OVERRIDE;
+ bool Run() override;
void MarkDeadPhis();
void EliminateDeadPhis();
@@ -53,7 +53,7 @@ class SsaRedundantPhiElimination : public HOptimization {
explicit SsaRedundantPhiElimination(HGraph* graph)
: HOptimization(graph, kSsaRedundantPhiEliminationPassName) {}
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination";
diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc
index 85ed06eb9b..e679893af2 100644
--- a/compiler/optimizing/ssa_test.cc
+++ b/compiler/optimizing/ssa_test.cc
@@ -38,15 +38,15 @@ class SsaPrettyPrinter : public HPrettyPrinter {
public:
explicit SsaPrettyPrinter(HGraph* graph) : HPrettyPrinter(graph), str_("") {}
- void PrintInt(int value) OVERRIDE {
+ void PrintInt(int value) override {
str_ += android::base::StringPrintf("%d", value);
}
- void PrintString(const char* value) OVERRIDE {
+ void PrintString(const char* value) override {
str_ += value;
}
- void PrintNewLine() OVERRIDE {
+ void PrintNewLine() override {
str_ += '\n';
}
@@ -54,7 +54,7 @@ class SsaPrettyPrinter : public HPrettyPrinter {
std::string str() const { return str_; }
- void VisitIntConstant(HIntConstant* constant) OVERRIDE {
+ void VisitIntConstant(HIntConstant* constant) override {
PrintPreInstruction(constant);
str_ += constant->DebugName();
str_ += " ";
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 7010e3f380..60ca61c133 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -16,682 +16,312 @@
#include "stack_map_stream.h"
+#include <memory>
+
#include "art_method-inl.h"
#include "base/stl_util.h"
#include "dex/dex_file_types.h"
#include "optimizing/optimizing_compiler.h"
#include "runtime.h"
#include "scoped_thread_state_change-inl.h"
+#include "stack_map.h"
namespace art {
-void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
- uint32_t native_pc_offset,
- uint32_t register_mask,
- BitVector* sp_mask,
- uint32_t num_dex_registers,
- uint8_t inlining_depth) {
- DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
- current_entry_.dex_pc = dex_pc;
- current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_);
- current_entry_.register_mask = register_mask;
- current_entry_.sp_mask = sp_mask;
- current_entry_.inlining_depth = inlining_depth;
- current_entry_.inline_infos_start_index = inline_infos_.size();
- current_entry_.stack_mask_index = 0;
- current_entry_.dex_method_index = dex::kDexNoIndex;
- current_entry_.dex_register_entry.num_dex_registers = num_dex_registers;
- current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size();
- current_entry_.dex_register_entry.live_dex_registers_mask = nullptr;
- if (num_dex_registers != 0u) {
- current_entry_.dex_register_entry.live_dex_registers_mask =
- ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
- current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
- }
- if (sp_mask != nullptr) {
- stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet());
- }
- if (inlining_depth > 0) {
- number_of_stack_maps_with_inline_info_++;
- }
+constexpr static bool kVerifyStackMaps = kIsDebugBuild;
- // Note: dex_pc can be kNoDexPc for native method intrinsics.
- if (dex_pc != dex::kDexNoIndex && (dex_pc_max_ == dex::kDexNoIndex || dex_pc_max_ < dex_pc)) {
- dex_pc_max_ = dex_pc;
- }
- register_mask_max_ = std::max(register_mask_max_, register_mask);
- current_dex_register_ = 0;
+uint32_t StackMapStream::GetStackMapNativePcOffset(size_t i) {
+ return StackMap::UnpackNativePc(stack_maps_[i][StackMap::kPackedNativePc], instruction_set_);
}
-void StackMapStream::EndStackMapEntry() {
- current_entry_.dex_register_map_index = AddDexRegisterMapEntry(current_entry_.dex_register_entry);
- stack_maps_.push_back(current_entry_);
- current_entry_ = StackMapEntry();
+void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
+ stack_maps_[i][StackMap::kPackedNativePc] =
+ StackMap::PackNativePc(native_pc_offset, instruction_set_);
}
-void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
- if (kind != DexRegisterLocation::Kind::kNone) {
- // Ensure we only use non-compressed location kind at this stage.
- DCHECK(DexRegisterLocation::IsShortLocationKind(kind)) << kind;
- DexRegisterLocation location(kind, value);
-
- // Look for Dex register `location` in the location catalog (using the
- // companion hash map of locations to indices). Use its index if it
- // is already in the location catalog. If not, insert it (in the
- // location catalog and the hash map) and use the newly created index.
- auto it = location_catalog_entries_indices_.Find(location);
- if (it != location_catalog_entries_indices_.end()) {
- // Retrieve the index from the hash map.
- dex_register_locations_.push_back(it->second);
- } else {
- // Create a new entry in the location catalog and the hash map.
- size_t index = location_catalog_entries_.size();
- location_catalog_entries_.push_back(location);
- dex_register_locations_.push_back(index);
- location_catalog_entries_indices_.Insert(std::make_pair(location, index));
- }
- DexRegisterMapEntry* const entry = in_inline_frame_
- ? &current_inline_info_.dex_register_entry
- : &current_entry_.dex_register_entry;
- DCHECK_LT(current_dex_register_, entry->num_dex_registers);
- entry->live_dex_registers_mask->SetBit(current_dex_register_);
- entry->hash += (1 <<
- (current_dex_register_ % (sizeof(DexRegisterMapEntry::hash) * kBitsPerByte)));
- entry->hash += static_cast<uint32_t>(value);
- entry->hash += static_cast<uint32_t>(kind);
- }
- current_dex_register_++;
+void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
+ size_t core_spill_mask,
+ size_t fp_spill_mask,
+ uint32_t num_dex_registers) {
+ DCHECK(!in_method_) << "Mismatched Begin/End calls";
+ in_method_ = true;
+ DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
+
+ DCHECK_ALIGNED(frame_size_in_bytes, kStackAlignment);
+ packed_frame_size_ = frame_size_in_bytes / kStackAlignment;
+ core_spill_mask_ = core_spill_mask;
+ fp_spill_mask_ = fp_spill_mask;
+ num_dex_registers_ = num_dex_registers;
}
-void StackMapStream::AddInvoke(InvokeType invoke_type, uint32_t dex_method_index) {
- current_entry_.invoke_type = invoke_type;
- current_entry_.dex_method_index = dex_method_index;
-}
+void StackMapStream::EndMethod() {
+ DCHECK(in_method_) << "Mismatched Begin/End calls";
+ in_method_ = false;
-void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
- uint32_t dex_pc,
- uint32_t num_dex_registers,
- const DexFile* outer_dex_file) {
- DCHECK(!in_inline_frame_);
- in_inline_frame_ = true;
- if (EncodeArtMethodInInlineInfo(method)) {
- current_inline_info_.method = method;
- } else {
- if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) {
- ScopedObjectAccess soa(Thread::Current());
- DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile()));
+ // Read the stack masks now. The compiler might have updated them.
+ for (size_t i = 0; i < lazy_stack_masks_.size(); i++) {
+ BitVector* stack_mask = lazy_stack_masks_[i];
+ if (stack_mask != nullptr && stack_mask->GetNumberOfBits() != 0) {
+ stack_maps_[i][StackMap::kStackMaskIndex] =
+ stack_masks_.Dedup(stack_mask->GetRawStorage(), stack_mask->GetNumberOfBits());
}
- current_inline_info_.method_index = method->GetDexMethodIndexUnchecked();
- }
- current_inline_info_.dex_pc = dex_pc;
- current_inline_info_.dex_register_entry.num_dex_registers = num_dex_registers;
- current_inline_info_.dex_register_entry.locations_start_index = dex_register_locations_.size();
- current_inline_info_.dex_register_entry.live_dex_registers_mask = nullptr;
- if (num_dex_registers != 0) {
- current_inline_info_.dex_register_entry.live_dex_registers_mask =
- ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream);
- current_inline_info_.dex_register_entry.live_dex_registers_mask->ClearAllBits();
- }
- current_dex_register_ = 0;
-}
-
-void StackMapStream::EndInlineInfoEntry() {
- current_inline_info_.dex_register_map_index =
- AddDexRegisterMapEntry(current_inline_info_.dex_register_entry);
- DCHECK(in_inline_frame_);
- DCHECK_EQ(current_dex_register_, current_inline_info_.dex_register_entry.num_dex_registers)
- << "Inline information contains less registers than expected";
- in_inline_frame_ = false;
- inline_infos_.push_back(current_inline_info_);
- current_inline_info_ = InlineInfoEntry();
-}
-
-CodeOffset StackMapStream::ComputeMaxNativePcCodeOffset() const {
- CodeOffset max_native_pc_offset;
- for (const StackMapEntry& entry : stack_maps_) {
- max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_code_offset);
- }
- return max_native_pc_offset;
-}
-
-size_t StackMapStream::PrepareForFillIn() {
- CodeInfoEncoding encoding;
- encoding.dex_register_map.num_entries = 0; // TODO: Remove this field.
- encoding.dex_register_map.num_bytes = ComputeDexRegisterMapsSize();
- encoding.location_catalog.num_entries = location_catalog_entries_.size();
- encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize();
- encoding.inline_info.num_entries = inline_infos_.size();
- // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires
- // dex_method_index_idx to be filled in.
- PrepareMethodIndices();
- ComputeInlineInfoEncoding(&encoding.inline_info.encoding,
- encoding.dex_register_map.num_bytes);
- CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
- // Prepare the CodeInfo variable-sized encoding.
- encoding.stack_mask.encoding.num_bits = stack_mask_max_ + 1; // Need room for max element too.
- encoding.stack_mask.num_entries = PrepareStackMasks(encoding.stack_mask.encoding.num_bits);
- encoding.register_mask.encoding.num_bits = MinimumBitsToStore(register_mask_max_);
- encoding.register_mask.num_entries = PrepareRegisterMasks();
- encoding.stack_map.num_entries = stack_maps_.size();
- encoding.stack_map.encoding.SetFromSizes(
- // The stack map contains compressed native PC offsets.
- max_native_pc_offset.CompressedValue(),
- dex_pc_max_,
- encoding.dex_register_map.num_bytes,
- encoding.inline_info.num_entries,
- encoding.register_mask.num_entries,
- encoding.stack_mask.num_entries);
- ComputeInvokeInfoEncoding(&encoding);
- DCHECK_EQ(code_info_encoding_.size(), 0u);
- encoding.Compress(&code_info_encoding_);
- encoding.ComputeTableOffsets();
- // Compute table offsets so we can get the non header size.
- DCHECK_EQ(encoding.HeaderSize(), code_info_encoding_.size());
- needed_size_ = code_info_encoding_.size() + encoding.NonHeaderSize();
- return needed_size_;
-}
-
-size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const {
- size_t size = DexRegisterLocationCatalog::kFixedSize;
- for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) {
- size += DexRegisterLocationCatalog::EntrySize(dex_register_location);
- }
- return size;
-}
-
-size_t StackMapStream::DexRegisterMapEntry::ComputeSize(size_t catalog_size) const {
- // For num_dex_registers == 0u live_dex_registers_mask may be null.
- if (num_dex_registers == 0u) {
- return 0u; // No register map will be emitted.
}
- DCHECK(live_dex_registers_mask != nullptr);
-
- // Size of the map in bytes.
- size_t size = DexRegisterMap::kFixedSize;
- // Add the live bit mask for the Dex register liveness.
- size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers);
- // Compute the size of the set of live Dex register entries.
- size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits();
- size_t map_entries_size_in_bits =
- DexRegisterMap::SingleEntrySizeInBits(catalog_size) * number_of_live_dex_registers;
- size_t map_entries_size_in_bytes =
- RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte;
- size += map_entries_size_in_bytes;
- return size;
}
-size_t StackMapStream::ComputeDexRegisterMapsSize() const {
- size_t size = 0;
- for (const DexRegisterMapEntry& entry : dex_register_entries_) {
- size += entry.ComputeSize(location_catalog_entries_.size());
- }
- return size;
-}
-
-void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) {
- DCHECK(encoding != nullptr);
- uint32_t native_pc_max = 0;
- uint16_t method_index_max = 0;
- size_t invoke_infos_count = 0;
- size_t invoke_type_max = 0;
- for (const StackMapEntry& entry : stack_maps_) {
- if (entry.dex_method_index != dex::kDexNoIndex) {
- native_pc_max = std::max(native_pc_max, entry.native_pc_code_offset.CompressedValue());
- method_index_max = std::max(method_index_max, static_cast<uint16_t>(entry.dex_method_index));
- invoke_type_max = std::max(invoke_type_max, static_cast<size_t>(entry.invoke_type));
- ++invoke_infos_count;
+void StackMapStream::BeginStackMapEntry(uint32_t dex_pc,
+ uint32_t native_pc_offset,
+ uint32_t register_mask,
+ BitVector* stack_mask,
+ StackMap::Kind kind) {
+ DCHECK(in_method_) << "Call BeginMethod first";
+ DCHECK(!in_stack_map_) << "Mismatched Begin/End calls";
+ in_stack_map_ = true;
+
+ current_stack_map_ = BitTableBuilder<StackMap>::Entry();
+ current_stack_map_[StackMap::kKind] = static_cast<uint32_t>(kind);
+ current_stack_map_[StackMap::kPackedNativePc] =
+ StackMap::PackNativePc(native_pc_offset, instruction_set_);
+ current_stack_map_[StackMap::kDexPc] = dex_pc;
+ if (stack_maps_.size() > 0) {
+ // Check that non-catch stack maps are sorted by pc.
+ // Catch stack maps are at the end and may be unordered.
+ if (stack_maps_.back()[StackMap::kKind] == StackMap::Kind::Catch) {
+ DCHECK(current_stack_map_[StackMap::kKind] == StackMap::Kind::Catch);
+ } else if (current_stack_map_[StackMap::kKind] != StackMap::Kind::Catch) {
+ DCHECK_LE(stack_maps_.back()[StackMap::kPackedNativePc],
+ current_stack_map_[StackMap::kPackedNativePc]);
}
}
- encoding->invoke_info.num_entries = invoke_infos_count;
- encoding->invoke_info.encoding.SetFromSizes(native_pc_max, invoke_type_max, method_index_max);
-}
-
-void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
- size_t dex_register_maps_bytes) {
- uint32_t method_index_max = 0;
- uint32_t dex_pc_max = dex::kDexNoIndex;
- uint32_t extra_data_max = 0;
-
- uint32_t inline_info_index = 0;
- for (const StackMapEntry& entry : stack_maps_) {
- for (size_t j = 0; j < entry.inlining_depth; ++j) {
- InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
- if (inline_entry.method == nullptr) {
- method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx);
- extra_data_max = std::max(extra_data_max, 1u);
- } else {
- method_index_max = std::max(
- method_index_max, High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
- extra_data_max = std::max(
- extra_data_max, Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
+ if (register_mask != 0) {
+ uint32_t shift = LeastSignificantBit(register_mask);
+ BitTableBuilder<RegisterMask>::Entry entry;
+ entry[RegisterMask::kValue] = register_mask >> shift;
+ entry[RegisterMask::kShift] = shift;
+ current_stack_map_[StackMap::kRegisterMaskIndex] = register_masks_.Dedup(&entry);
+ }
+ // The compiler assumes the bit vector will be read during PrepareForFillIn(),
+ // and it might modify the data before that. Therefore, just store the pointer.
+ // See ClearSpillSlotsFromLoopPhisInStackMap in code_generator.h.
+ lazy_stack_masks_.push_back(stack_mask);
+ current_inline_infos_.clear();
+ current_dex_registers_.clear();
+ expected_num_dex_registers_ = num_dex_registers_;
+
+ if (kVerifyStackMaps) {
+ size_t stack_map_index = stack_maps_.size();
+ // Create lambda method, which will be executed at the very end to verify data.
+ // Parameters and local variables will be captured(stored) by the lambda "[=]".
+ dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ if (kind == StackMap::Kind::Default || kind == StackMap::Kind::OSR) {
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset,
+ instruction_set_);
+ CHECK_EQ(stack_map.Row(), stack_map_index);
+ } else if (kind == StackMap::Kind::Catch) {
+ StackMap stack_map = code_info.GetCatchStackMapForDexPc(dex_pc);
+ CHECK_EQ(stack_map.Row(), stack_map_index);
}
- if (inline_entry.dex_pc != dex::kDexNoIndex &&
- (dex_pc_max == dex::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) {
- dex_pc_max = inline_entry.dex_pc;
+ StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
+ CHECK_EQ(stack_map.GetNativePcOffset(instruction_set_), native_pc_offset);
+ CHECK_EQ(stack_map.GetKind(), static_cast<uint32_t>(kind));
+ CHECK_EQ(stack_map.GetDexPc(), dex_pc);
+ CHECK_EQ(code_info.GetRegisterMaskOf(stack_map), register_mask);
+ BitMemoryRegion seen_stack_mask = code_info.GetStackMaskOf(stack_map);
+ CHECK_GE(seen_stack_mask.size_in_bits(), stack_mask ? stack_mask->GetNumberOfBits() : 0);
+ for (size_t b = 0; b < seen_stack_mask.size_in_bits(); b++) {
+ CHECK_EQ(seen_stack_mask.LoadBit(b), stack_mask != nullptr && stack_mask->IsBitSet(b));
}
- }
+ });
}
- DCHECK_EQ(inline_info_index, inline_infos_.size());
-
- encoding->SetFromSizes(method_index_max, dex_pc_max, extra_data_max, dex_register_maps_bytes);
}
-size_t StackMapStream::MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry,
- size_t* current_offset,
- MemoryRegion dex_register_locations_region) {
- DCHECK(current_offset != nullptr);
- if ((entry.num_dex_registers == 0) || (entry.live_dex_registers_mask->NumSetBits() == 0)) {
- // No dex register map needed.
- return StackMap::kNoDexRegisterMap;
- }
- if (entry.offset == DexRegisterMapEntry::kOffsetUnassigned) {
- // Not already copied, need to copy and and assign an offset.
- entry.offset = *current_offset;
- const size_t entry_size = entry.ComputeSize(location_catalog_entries_.size());
- DexRegisterMap dex_register_map(
- dex_register_locations_region.Subregion(entry.offset, entry_size));
- *current_offset += entry_size;
- // Fill in the map since it was just added.
- FillInDexRegisterMap(dex_register_map,
- entry.num_dex_registers,
- *entry.live_dex_registers_mask,
- entry.locations_start_index);
- }
- return entry.offset;
-}
-
-void StackMapStream::FillInMethodInfo(MemoryRegion region) {
- {
- MethodInfo info(region.begin(), method_indices_.size());
- for (size_t i = 0; i < method_indices_.size(); ++i) {
- info.SetMethodIndex(i, method_indices_[i]);
- }
- }
- if (kIsDebugBuild) {
- // Check the data matches.
- MethodInfo info(region.begin());
- const size_t count = info.NumMethodIndices();
- DCHECK_EQ(count, method_indices_.size());
- for (size_t i = 0; i < count; ++i) {
- DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]);
- }
- }
-}
-
-void StackMapStream::FillInCodeInfo(MemoryRegion region) {
- DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
- DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
-
- DCHECK_EQ(region.size(), needed_size_);
-
- // Note that the memory region does not have to be zeroed when we JIT code
- // because we do not use the arena allocator there.
-
- // Write the CodeInfo header.
- region.CopyFrom(0, MemoryRegion(code_info_encoding_.data(), code_info_encoding_.size()));
-
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- DCHECK_EQ(encoding.stack_map.num_entries, stack_maps_.size());
-
- MemoryRegion dex_register_locations_region = region.Subregion(
- encoding.dex_register_map.byte_offset,
- encoding.dex_register_map.num_bytes);
-
- // Set the Dex register location catalog.
- MemoryRegion dex_register_location_catalog_region = region.Subregion(
- encoding.location_catalog.byte_offset,
- encoding.location_catalog.num_bytes);
- DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region);
- // Offset in `dex_register_location_catalog` where to store the next
- // register location.
- size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize;
- for (DexRegisterLocation dex_register_location : location_catalog_entries_) {
- dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location);
- location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location);
- }
- // Ensure we reached the end of the Dex registers location_catalog.
- DCHECK_EQ(location_catalog_offset, dex_register_location_catalog_region.size());
-
- ArenaBitVector empty_bitmask(allocator_, 0, /* expandable */ false, kArenaAllocStackMapStream);
- uintptr_t next_dex_register_map_offset = 0;
- uintptr_t next_inline_info_index = 0;
- size_t invoke_info_idx = 0;
- for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) {
- StackMap stack_map = code_info.GetStackMapAt(i, encoding);
- StackMapEntry entry = stack_maps_[i];
-
- stack_map.SetDexPc(encoding.stack_map.encoding, entry.dex_pc);
- stack_map.SetNativePcCodeOffset(encoding.stack_map.encoding, entry.native_pc_code_offset);
- stack_map.SetRegisterMaskIndex(encoding.stack_map.encoding, entry.register_mask_index);
- stack_map.SetStackMaskIndex(encoding.stack_map.encoding, entry.stack_mask_index);
-
- size_t offset = MaybeCopyDexRegisterMap(dex_register_entries_[entry.dex_register_map_index],
- &next_dex_register_map_offset,
- dex_register_locations_region);
- stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset);
-
- if (entry.dex_method_index != dex::kDexNoIndex) {
- InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx));
- invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset);
- invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type);
- invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx);
- ++invoke_info_idx;
- }
+void StackMapStream::EndStackMapEntry() {
+ DCHECK(in_stack_map_) << "Mismatched Begin/End calls";
+ in_stack_map_ = false;
- // Set the inlining info.
- if (entry.inlining_depth != 0) {
- InlineInfo inline_info = code_info.GetInlineInfo(next_inline_info_index, encoding);
-
- // Fill in the index.
- stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, next_inline_info_index);
- DCHECK_EQ(next_inline_info_index, entry.inline_infos_start_index);
- next_inline_info_index += entry.inlining_depth;
-
- inline_info.SetDepth(encoding.inline_info.encoding, entry.inlining_depth);
- DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size());
-
- for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
- InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
- if (inline_entry.method != nullptr) {
- inline_info.SetMethodIndexIdxAtDepth(
- encoding.inline_info.encoding,
- depth,
- High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
- inline_info.SetExtraDataAtDepth(
- encoding.inline_info.encoding,
- depth,
- Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
- } else {
- inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding,
- depth,
- inline_entry.dex_method_index_idx);
- inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1);
- }
- inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc);
- size_t dex_register_map_offset = MaybeCopyDexRegisterMap(
- dex_register_entries_[inline_entry.dex_register_map_index],
- &next_dex_register_map_offset,
- dex_register_locations_region);
- inline_info.SetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding,
- depth,
- dex_register_map_offset);
- }
- } else if (encoding.stack_map.encoding.GetInlineInfoEncoding().BitSize() > 0) {
- stack_map.SetInlineInfoIndex(encoding.stack_map.encoding, StackMap::kNoInlineInfo);
- }
+ // Generate index into the InlineInfo table.
+ size_t inlining_depth = current_inline_infos_.size();
+ if (!current_inline_infos_.empty()) {
+ current_inline_infos_.back()[InlineInfo::kIsLast] = InlineInfo::kLast;
+ current_stack_map_[StackMap::kInlineInfoIndex] =
+ inline_infos_.Dedup(current_inline_infos_.data(), current_inline_infos_.size());
}
- // Write stack masks table.
- const size_t stack_mask_bits = encoding.stack_mask.encoding.BitSize();
- if (stack_mask_bits > 0) {
- size_t stack_mask_bytes = RoundUp(stack_mask_bits, kBitsPerByte) / kBitsPerByte;
- for (size_t i = 0; i < encoding.stack_mask.num_entries; ++i) {
- MemoryRegion source(&stack_masks_[i * stack_mask_bytes], stack_mask_bytes);
- BitMemoryRegion stack_mask = code_info.GetStackMask(i, encoding);
- for (size_t bit_index = 0; bit_index < stack_mask_bits; ++bit_index) {
- stack_mask.StoreBit(bit_index, source.LoadBit(bit_index));
- }
- }
+ // Generate delta-compressed dex register map.
+ size_t num_dex_registers = current_dex_registers_.size();
+ if (!current_dex_registers_.empty()) {
+ DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
+ CreateDexRegisterMap();
}
- // Write register masks table.
- for (size_t i = 0; i < encoding.register_mask.num_entries; ++i) {
- BitMemoryRegion register_mask = code_info.GetRegisterMask(i, encoding);
- register_mask.StoreBits(0, register_masks_[i], encoding.register_mask.encoding.BitSize());
- }
+ stack_maps_.Add(current_stack_map_);
- // Verify all written data in debug build.
- if (kIsDebugBuild) {
- CheckCodeInfo(region);
+ if (kVerifyStackMaps) {
+ size_t stack_map_index = stack_maps_.size() - 1;
+ dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
+ CHECK_EQ(stack_map.HasDexRegisterMap(), (num_dex_registers != 0));
+ CHECK_EQ(stack_map.HasInlineInfo(), (inlining_depth != 0));
+ CHECK_EQ(code_info.GetInlineInfosOf(stack_map).size(), inlining_depth);
+ });
}
}
-void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map,
+void StackMapStream::BeginInlineInfoEntry(ArtMethod* method,
+ uint32_t dex_pc,
uint32_t num_dex_registers,
- const BitVector& live_dex_registers_mask,
- uint32_t start_index_in_dex_register_locations) const {
- dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask);
- // Set the dex register location mapping data.
- size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits();
- DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
- DCHECK_LE(start_index_in_dex_register_locations,
- dex_register_locations_.size() - number_of_live_dex_registers);
- for (size_t index_in_dex_register_locations = 0;
- index_in_dex_register_locations != number_of_live_dex_registers;
- ++index_in_dex_register_locations) {
- size_t location_catalog_entry_index = dex_register_locations_[
- start_index_in_dex_register_locations + index_in_dex_register_locations];
- dex_register_map.SetLocationCatalogEntryIndex(
- index_in_dex_register_locations,
- location_catalog_entry_index,
- num_dex_registers,
- location_catalog_entries_.size());
- }
-}
+ const DexFile* outer_dex_file) {
+ DCHECK(in_stack_map_) << "Call BeginStackMapEntry first";
+ DCHECK(!in_inline_info_) << "Mismatched Begin/End calls";
+ in_inline_info_ = true;
+ DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
+
+ expected_num_dex_registers_ += num_dex_registers;
-size_t StackMapStream::AddDexRegisterMapEntry(const DexRegisterMapEntry& entry) {
- const size_t current_entry_index = dex_register_entries_.size();
- auto entries_it = dex_map_hash_to_stack_map_indices_.find(entry.hash);
- if (entries_it == dex_map_hash_to_stack_map_indices_.end()) {
- // We don't have a perfect hash functions so we need a list to collect all stack maps
- // which might have the same dex register map.
- ScopedArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream));
- stack_map_indices.push_back(current_entry_index);
- dex_map_hash_to_stack_map_indices_.Put(entry.hash, std::move(stack_map_indices));
+ BitTableBuilder<InlineInfo>::Entry entry;
+ entry[InlineInfo::kIsLast] = InlineInfo::kMore;
+ entry[InlineInfo::kDexPc] = dex_pc;
+ entry[InlineInfo::kNumberOfDexRegisters] = static_cast<uint32_t>(expected_num_dex_registers_);
+ if (EncodeArtMethodInInlineInfo(method)) {
+ entry[InlineInfo::kArtMethodHi] = High32Bits(reinterpret_cast<uintptr_t>(method));
+ entry[InlineInfo::kArtMethodLo] = Low32Bits(reinterpret_cast<uintptr_t>(method));
} else {
- // We might have collisions, so we need to check whether or not we really have a match.
- for (uint32_t test_entry_index : entries_it->second) {
- if (DexRegisterMapEntryEquals(dex_register_entries_[test_entry_index], entry)) {
- return test_entry_index;
- }
+ if (dex_pc != static_cast<uint32_t>(-1) && kIsDebugBuild) {
+ ScopedObjectAccess soa(Thread::Current());
+ DCHECK(IsSameDexFile(*outer_dex_file, *method->GetDexFile()));
}
- entries_it->second.push_back(current_entry_index);
+ uint32_t dex_method_index = method->GetDexMethodIndex();
+ entry[InlineInfo::kMethodInfoIndex] = method_infos_.Dedup({dex_method_index});
+ }
+ current_inline_infos_.push_back(entry);
+
+ if (kVerifyStackMaps) {
+ size_t stack_map_index = stack_maps_.size();
+ size_t depth = current_inline_infos_.size() - 1;
+ dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
+ InlineInfo inline_info = code_info.GetInlineInfosOf(stack_map)[depth];
+ CHECK_EQ(inline_info.GetDexPc(), dex_pc);
+ bool encode_art_method = EncodeArtMethodInInlineInfo(method);
+ CHECK_EQ(inline_info.EncodesArtMethod(), encode_art_method);
+ if (encode_art_method) {
+ CHECK_EQ(inline_info.GetArtMethod(), method);
+ } else {
+ CHECK_EQ(code_info.GetMethodIndexOf(inline_info), method->GetDexMethodIndex());
+ }
+ });
}
- dex_register_entries_.push_back(entry);
- return current_entry_index;
}
-bool StackMapStream::DexRegisterMapEntryEquals(const DexRegisterMapEntry& a,
- const DexRegisterMapEntry& b) const {
- if ((a.live_dex_registers_mask == nullptr) != (b.live_dex_registers_mask == nullptr)) {
- return false;
- }
- if (a.num_dex_registers != b.num_dex_registers) {
- return false;
- }
- if (a.num_dex_registers != 0u) {
- DCHECK(a.live_dex_registers_mask != nullptr);
- DCHECK(b.live_dex_registers_mask != nullptr);
- if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) {
- return false;
- }
- size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits();
- DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size());
- DCHECK_LE(a.locations_start_index,
- dex_register_locations_.size() - number_of_live_dex_registers);
- DCHECK_LE(b.locations_start_index,
- dex_register_locations_.size() - number_of_live_dex_registers);
- auto a_begin = dex_register_locations_.begin() + a.locations_start_index;
- auto b_begin = dex_register_locations_.begin() + b.locations_start_index;
- if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) {
- return false;
- }
- }
- return true;
+void StackMapStream::EndInlineInfoEntry() {
+ DCHECK(in_inline_info_) << "Mismatched Begin/End calls";
+ in_inline_info_ = false;
+ DCHECK_EQ(expected_num_dex_registers_, current_dex_registers_.size());
}
-// Helper for CheckCodeInfo - check that register map has the expected content.
-void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info,
- const DexRegisterMap& dex_register_map,
- size_t num_dex_registers,
- BitVector* live_dex_registers_mask,
- size_t dex_register_locations_index) const {
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- for (size_t reg = 0; reg < num_dex_registers; reg++) {
- // Find the location we tried to encode.
- DexRegisterLocation expected = DexRegisterLocation::None();
- if (live_dex_registers_mask->IsBitSet(reg)) {
- size_t catalog_index = dex_register_locations_[dex_register_locations_index++];
- expected = location_catalog_entries_[catalog_index];
+// Create delta-compressed dex register map based on the current list of DexRegisterLocations.
+// All dex registers for a stack map are concatenated - inlined registers are just appended.
+void StackMapStream::CreateDexRegisterMap() {
+ // These are fields rather than local variables so that we can reuse the reserved memory.
+ temp_dex_register_mask_.ClearAllBits();
+ temp_dex_register_map_.clear();
+
+ // Ensure that the arrays that hold previous state are big enough to be safely indexed below.
+ if (previous_dex_registers_.size() < current_dex_registers_.size()) {
+ previous_dex_registers_.resize(current_dex_registers_.size(), DexRegisterLocation::None());
+ dex_register_timestamp_.resize(current_dex_registers_.size(), 0u);
+ }
+
+ // Set bit in the mask for each register that has been changed since the previous stack map.
+ // Modified registers are stored in the catalogue and the catalogue index added to the list.
+ for (size_t i = 0; i < current_dex_registers_.size(); i++) {
+ DexRegisterLocation reg = current_dex_registers_[i];
+ // Distance is difference between this index and the index of last modification.
+ uint32_t distance = stack_maps_.size() - dex_register_timestamp_[i];
+ if (previous_dex_registers_[i] != reg || distance > kMaxDexRegisterMapSearchDistance) {
+ BitTableBuilder<DexRegisterInfo>::Entry entry;
+ entry[DexRegisterInfo::kKind] = static_cast<uint32_t>(reg.GetKind());
+ entry[DexRegisterInfo::kPackedValue] =
+ DexRegisterInfo::PackValue(reg.GetKind(), reg.GetValue());
+ uint32_t index = reg.IsLive() ? dex_register_catalog_.Dedup(&entry) : kNoValue;
+ temp_dex_register_mask_.SetBit(i);
+ temp_dex_register_map_.push_back({index});
+ previous_dex_registers_[i] = reg;
+ dex_register_timestamp_[i] = stack_maps_.size();
}
- // Compare to the seen location.
- if (expected.GetKind() == DexRegisterLocation::Kind::kNone) {
- DCHECK(!dex_register_map.IsValid() || !dex_register_map.IsDexRegisterLive(reg))
- << dex_register_map.IsValid() << " " << dex_register_map.IsDexRegisterLive(reg);
- } else {
- DCHECK(dex_register_map.IsDexRegisterLive(reg));
- DexRegisterLocation seen = dex_register_map.GetDexRegisterLocation(
- reg, num_dex_registers, code_info, encoding);
- DCHECK_EQ(expected.GetKind(), seen.GetKind());
- DCHECK_EQ(expected.GetValue(), seen.GetValue());
- }
- }
- if (num_dex_registers == 0) {
- DCHECK(!dex_register_map.IsValid());
}
-}
-size_t StackMapStream::PrepareRegisterMasks() {
- register_masks_.resize(stack_maps_.size(), 0u);
- ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- const size_t index = dedupe.size();
- stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second;
- register_masks_[index] = stack_map.register_mask;
- }
- return dedupe.size();
-}
-
-void StackMapStream::PrepareMethodIndices() {
- CHECK(method_indices_.empty());
- method_indices_.resize(stack_maps_.size() + inline_infos_.size());
- ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- const size_t index = dedupe.size();
- const uint32_t method_index = stack_map.dex_method_index;
- if (method_index != dex::kDexNoIndex) {
- stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
- method_indices_[index] = method_index;
- }
- }
- for (InlineInfoEntry& inline_info : inline_infos_) {
- const size_t index = dedupe.size();
- const uint32_t method_index = inline_info.method_index;
- CHECK_NE(method_index, dex::kDexNoIndex);
- inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
- method_indices_[index] = method_index;
+ // Set the mask and map for the current StackMap (which includes inlined registers).
+ if (temp_dex_register_mask_.GetNumberOfBits() != 0) {
+ current_stack_map_[StackMap::kDexRegisterMaskIndex] =
+ dex_register_masks_.Dedup(temp_dex_register_mask_.GetRawStorage(),
+ temp_dex_register_mask_.GetNumberOfBits());
+ }
+ if (!current_dex_registers_.empty()) {
+ current_stack_map_[StackMap::kDexRegisterMapIndex] =
+ dex_register_maps_.Dedup(temp_dex_register_map_.data(),
+ temp_dex_register_map_.size());
+ }
+
+ if (kVerifyStackMaps) {
+ size_t stack_map_index = stack_maps_.size();
+ // We need to make copy of the current registers for later (when the check is run).
+ auto expected_dex_registers = std::make_shared<dchecked_vector<DexRegisterLocation>>(
+ current_dex_registers_.begin(), current_dex_registers_.end());
+ dchecks_.emplace_back([=](const CodeInfo& code_info) {
+ StackMap stack_map = code_info.GetStackMapAt(stack_map_index);
+ uint32_t expected_reg = 0;
+ for (DexRegisterLocation reg : code_info.GetDexRegisterMapOf(stack_map)) {
+ CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
+ }
+ for (InlineInfo inline_info : code_info.GetInlineInfosOf(stack_map)) {
+ DexRegisterMap map = code_info.GetInlineDexRegisterMapOf(stack_map, inline_info);
+ for (DexRegisterLocation reg : map) {
+ CHECK_EQ((*expected_dex_registers)[expected_reg++], reg);
+ }
+ }
+ CHECK_EQ(expected_reg, expected_dex_registers->size());
+ });
}
- method_indices_.resize(dedupe.size());
}
-
-size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
- // Preallocate memory since we do not want it to move (the dedup map will point into it).
- const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
- stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u);
- // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later
- // when copying out from stack_masks_.
- ScopedArenaUnorderedMap<MemoryRegion,
- size_t,
- FNVHash<MemoryRegion>,
- MemoryRegion::ContentEquals> dedup(
- stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream));
- for (StackMapEntry& stack_map : stack_maps_) {
- size_t index = dedup.size();
- MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size);
- for (size_t i = 0; i < entry_size_in_bits; i++) {
- stack_mask.StoreBit(i, stack_map.sp_mask != nullptr && stack_map.sp_mask->IsBitSet(i));
- }
- stack_map.stack_mask_index = dedup.emplace(stack_mask, index).first->second;
- }
- return dedup.size();
+template<typename Writer, typename Builder>
+ALWAYS_INLINE static void EncodeTable(Writer& out, const Builder& bit_table) {
+ out.WriteBit(false); // Is not deduped.
+ bit_table.Encode(out);
}
-// Check that all StackMapStream inputs are correctly encoded by trying to read them back.
-void StackMapStream::CheckCodeInfo(MemoryRegion region) const {
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- DCHECK_EQ(code_info.GetNumberOfStackMaps(encoding), stack_maps_.size());
- size_t invoke_info_index = 0;
- for (size_t s = 0; s < stack_maps_.size(); ++s) {
- const StackMap stack_map = code_info.GetStackMapAt(s, encoding);
- const StackMapEncoding& stack_map_encoding = encoding.stack_map.encoding;
- StackMapEntry entry = stack_maps_[s];
-
- // Check main stack map fields.
- DCHECK_EQ(stack_map.GetNativePcOffset(stack_map_encoding, instruction_set_),
- entry.native_pc_code_offset.Uint32Value(instruction_set_));
- DCHECK_EQ(stack_map.GetDexPc(stack_map_encoding), entry.dex_pc);
- DCHECK_EQ(stack_map.GetRegisterMaskIndex(stack_map_encoding), entry.register_mask_index);
- DCHECK_EQ(code_info.GetRegisterMaskOf(encoding, stack_map), entry.register_mask);
- const size_t num_stack_mask_bits = code_info.GetNumberOfStackMaskBits(encoding);
- DCHECK_EQ(stack_map.GetStackMaskIndex(stack_map_encoding), entry.stack_mask_index);
- BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
- if (entry.sp_mask != nullptr) {
- DCHECK_GE(stack_mask.size_in_bits(), entry.sp_mask->GetNumberOfBits());
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), entry.sp_mask->IsBitSet(b));
- }
- } else {
- for (size_t b = 0; b < num_stack_mask_bits; b++) {
- DCHECK_EQ(stack_mask.LoadBit(b), 0u);
- }
- }
- if (entry.dex_method_index != dex::kDexNoIndex) {
- InvokeInfo invoke_info = code_info.GetInvokeInfo(encoding, invoke_info_index);
- DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_),
- entry.native_pc_code_offset.Uint32Value(instruction_set_));
- DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type);
- DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding),
- entry.dex_method_index_idx);
- invoke_info_index++;
- }
- CheckDexRegisterMap(code_info,
- code_info.GetDexRegisterMapOf(
- stack_map, encoding, entry.dex_register_entry.num_dex_registers),
- entry.dex_register_entry.num_dex_registers,
- entry.dex_register_entry.live_dex_registers_mask,
- entry.dex_register_entry.locations_start_index);
-
- // Check inline info.
- DCHECK_EQ(stack_map.HasInlineInfo(stack_map_encoding), (entry.inlining_depth != 0));
- if (entry.inlining_depth != 0) {
- InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
- DCHECK_EQ(inline_info.GetDepth(encoding.inline_info.encoding), entry.inlining_depth);
- for (size_t d = 0; d < entry.inlining_depth; ++d) {
- size_t inline_info_index = entry.inline_infos_start_index + d;
- DCHECK_LT(inline_info_index, inline_infos_.size());
- InlineInfoEntry inline_entry = inline_infos_[inline_info_index];
- DCHECK_EQ(inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, d),
- inline_entry.dex_pc);
- if (inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, d)) {
- DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d),
- inline_entry.method);
- } else {
- const size_t method_index_idx =
- inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d);
- DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx);
- DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index);
- }
-
- CheckDexRegisterMap(code_info,
- code_info.GetDexRegisterMapAtDepth(
- d,
- inline_info,
- encoding,
- inline_entry.dex_register_entry.num_dex_registers),
- inline_entry.dex_register_entry.num_dex_registers,
- inline_entry.dex_register_entry.live_dex_registers_mask,
- inline_entry.dex_register_entry.locations_start_index);
- }
+ScopedArenaVector<uint8_t> StackMapStream::Encode() {
+ DCHECK(in_stack_map_ == false) << "Mismatched Begin/End calls";
+ DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
+
+ ScopedArenaVector<uint8_t> buffer(allocator_->Adapter(kArenaAllocStackMapStream));
+ BitMemoryWriter<ScopedArenaVector<uint8_t>> out(&buffer);
+ out.WriteVarint(packed_frame_size_);
+ out.WriteVarint(core_spill_mask_);
+ out.WriteVarint(fp_spill_mask_);
+ out.WriteVarint(num_dex_registers_);
+ EncodeTable(out, stack_maps_);
+ EncodeTable(out, register_masks_);
+ EncodeTable(out, stack_masks_);
+ EncodeTable(out, inline_infos_);
+ EncodeTable(out, method_infos_);
+ EncodeTable(out, dex_register_masks_);
+ EncodeTable(out, dex_register_maps_);
+ EncodeTable(out, dex_register_catalog_);
+
+ // Verify that we can load the CodeInfo and check some essentials.
+ CodeInfo code_info(buffer.data());
+ CHECK_EQ(code_info.Size(), buffer.size());
+ CHECK_EQ(code_info.GetNumberOfStackMaps(), stack_maps_.size());
+
+ // Verify all written data (usually only in debug builds).
+ if (kVerifyStackMaps) {
+ for (const auto& dcheck : dchecks_) {
+ dcheck(code_info);
}
}
-}
-size_t StackMapStream::ComputeMethodInfoSize() const {
- DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__;
- return MethodInfo::ComputeSize(method_indices_.size());
+ return buffer;
}
} // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 579aabdb5f..01c6bf9e0e 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -17,133 +17,63 @@
#ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_
#define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_
+#include "base/allocator.h"
+#include "base/arena_bit_vector.h"
+#include "base/bit_table.h"
#include "base/bit_vector-inl.h"
-#include "base/hash_map.h"
+#include "base/memory_region.h"
#include "base/scoped_arena_containers.h"
#include "base/value_object.h"
-#include "memory_region.h"
-#include "method_info.h"
+#include "dex_register_location.h"
#include "nodes.h"
#include "stack_map.h"
namespace art {
-// Helper to build art::StackMapStream::LocationCatalogEntriesIndices.
-class LocationCatalogEntriesIndicesEmptyFn {
- public:
- void MakeEmpty(std::pair<DexRegisterLocation, size_t>& item) const {
- item.first = DexRegisterLocation::None();
- }
- bool IsEmpty(const std::pair<DexRegisterLocation, size_t>& item) const {
- return item.first == DexRegisterLocation::None();
- }
-};
-
-// Hash function for art::StackMapStream::LocationCatalogEntriesIndices.
-// This hash function does not create collisions.
-class DexRegisterLocationHashFn {
- public:
- size_t operator()(DexRegisterLocation key) const {
- // Concatenate `key`s fields to create a 64-bit value to be hashed.
- int64_t kind_and_value =
- (static_cast<int64_t>(key.kind_) << 32) | static_cast<int64_t>(key.value_);
- return inner_hash_fn_(kind_and_value);
- }
- private:
- std::hash<int64_t> inner_hash_fn_;
-};
-
-
/**
* Collects and builds stack maps for a method. All the stack maps
* for a method are placed in a CodeInfo object.
*/
-class StackMapStream : public ValueObject {
+class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
public:
explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set)
: allocator_(allocator),
instruction_set_(instruction_set),
- stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)),
- location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
- location_catalog_entries_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
- dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)),
- inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
- stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
- register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
- method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
- dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
- stack_mask_max_(-1),
- dex_pc_max_(kNoDexPc),
- register_mask_max_(0),
- number_of_stack_maps_with_inline_info_(0),
- dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(),
- allocator->Adapter(kArenaAllocStackMapStream)),
- current_entry_(),
- current_inline_info_(),
- code_info_encoding_(allocator->Adapter(kArenaAllocStackMapStream)),
- needed_size_(0),
- current_dex_register_(0),
- in_inline_frame_(false) {
- stack_maps_.reserve(10);
- location_catalog_entries_.reserve(4);
- dex_register_locations_.reserve(10 * 4);
- inline_infos_.reserve(2);
- code_info_encoding_.reserve(16);
+ stack_maps_(allocator),
+ inline_infos_(allocator),
+ method_infos_(allocator),
+ register_masks_(allocator),
+ stack_masks_(allocator),
+ dex_register_masks_(allocator),
+ dex_register_maps_(allocator),
+ dex_register_catalog_(allocator),
+ lazy_stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+ current_stack_map_(),
+ current_inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
+ current_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
+ previous_dex_registers_(allocator->Adapter(kArenaAllocStackMapStream)),
+ dex_register_timestamp_(allocator->Adapter(kArenaAllocStackMapStream)),
+ expected_num_dex_registers_(0u),
+ temp_dex_register_mask_(allocator, 32, true, kArenaAllocStackMapStream),
+ temp_dex_register_map_(allocator->Adapter(kArenaAllocStackMapStream)) {
}
- // A dex register map entry for a single stack map entry, contains what registers are live as
- // well as indices into the location catalog.
- class DexRegisterMapEntry {
- public:
- static const size_t kOffsetUnassigned = -1;
-
- BitVector* live_dex_registers_mask;
- uint32_t num_dex_registers;
- size_t locations_start_index;
- // Computed fields
- size_t hash = 0;
- size_t offset = kOffsetUnassigned;
-
- size_t ComputeSize(size_t catalog_size) const;
- };
-
- // See runtime/stack_map.h to know what these fields contain.
- struct StackMapEntry {
- uint32_t dex_pc;
- CodeOffset native_pc_code_offset;
- uint32_t register_mask;
- BitVector* sp_mask;
- uint8_t inlining_depth;
- size_t inline_infos_start_index;
- uint32_t stack_mask_index;
- uint32_t register_mask_index;
- DexRegisterMapEntry dex_register_entry;
- size_t dex_register_map_index;
- InvokeType invoke_type;
- uint32_t dex_method_index;
- uint32_t dex_method_index_idx; // Index into dex method index table.
- };
-
- struct InlineInfoEntry {
- uint32_t dex_pc; // dex::kDexNoIndex for intrinsified native methods.
- ArtMethod* method;
- uint32_t method_index;
- DexRegisterMapEntry dex_register_entry;
- size_t dex_register_map_index;
- uint32_t dex_method_index_idx; // Index into the dex method index table.
- };
+ void BeginMethod(size_t frame_size_in_bytes,
+ size_t core_spill_mask,
+ size_t fp_spill_mask,
+ uint32_t num_dex_registers);
+ void EndMethod();
void BeginStackMapEntry(uint32_t dex_pc,
uint32_t native_pc_offset,
- uint32_t register_mask,
- BitVector* sp_mask,
- uint32_t num_dex_registers,
- uint8_t inlining_depth);
+ uint32_t register_mask = 0,
+ BitVector* sp_mask = nullptr,
+ StackMap::Kind kind = StackMap::Kind::Default);
void EndStackMapEntry();
- void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value);
-
- void AddInvoke(InvokeType type, uint32_t dex_method_index);
+ void AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t value) {
+ current_dex_registers_.push_back(DexRegisterLocation(kind, value));
+ }
void BeginInlineInfoEntry(ArtMethod* method,
uint32_t dex_pc,
@@ -155,109 +85,54 @@ class StackMapStream : public ValueObject {
return stack_maps_.size();
}
- const StackMapEntry& GetStackMap(size_t i) const {
- return stack_maps_[i];
- }
-
- void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) {
- stack_maps_[i].native_pc_code_offset =
- CodeOffset::FromOffset(native_pc_offset, instruction_set_);
- }
-
- // Prepares the stream to fill in a memory region. Must be called before FillIn.
- // Returns the size (in bytes) needed to store this stream.
- size_t PrepareForFillIn();
- void FillInCodeInfo(MemoryRegion region);
- void FillInMethodInfo(MemoryRegion region);
+ uint32_t GetStackMapNativePcOffset(size_t i);
+ void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset);
- size_t ComputeMethodInfoSize() const;
+ // Encode all stack map data.
+ // The returned vector is allocated using the allocator passed to the StackMapStream.
+ ScopedArenaVector<uint8_t> Encode();
private:
- size_t ComputeDexRegisterLocationCatalogSize() const;
- size_t ComputeDexRegisterMapsSize() const;
- void ComputeInlineInfoEncoding(InlineInfoEncoding* encoding,
- size_t dex_register_maps_bytes);
-
- CodeOffset ComputeMaxNativePcCodeOffset() const;
-
- // Returns the number of unique stack masks.
- size_t PrepareStackMasks(size_t entry_size_in_bits);
-
- // Returns the number of unique register masks.
- size_t PrepareRegisterMasks();
-
- // Prepare and deduplicate method indices.
- void PrepareMethodIndices();
-
- // Deduplicate entry if possible and return the corresponding index into dex_register_entries_
- // array. If entry is not a duplicate, a new entry is added to dex_register_entries_.
- size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry);
-
- // Return true if the two dex register map entries are equal.
- bool DexRegisterMapEntryEquals(const DexRegisterMapEntry& a, const DexRegisterMapEntry& b) const;
+ static constexpr uint32_t kNoValue = -1;
- // Fill in the corresponding entries of a register map.
- void ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding);
+ void CreateDexRegisterMap();
- // Returns the index of an entry with the same dex register map as the current_entry,
- // or kNoSameDexMapFound if no such entry exists.
- size_t FindEntryWithTheSameDexMap();
- bool HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEntry& b) const;
-
- // Fill in the corresponding entries of a register map.
- void FillInDexRegisterMap(DexRegisterMap dex_register_map,
- uint32_t num_dex_registers,
- const BitVector& live_dex_registers_mask,
- uint32_t start_index_in_dex_register_locations) const;
-
- // Returns the offset for the dex register inside of the dex register location region. See FillIn.
- // Only copies the dex register map if the offset for the entry is not already assigned.
- size_t MaybeCopyDexRegisterMap(DexRegisterMapEntry& entry,
- size_t* current_offset,
- MemoryRegion dex_register_locations_region);
- void CheckDexRegisterMap(const CodeInfo& code_info,
- const DexRegisterMap& dex_register_map,
- size_t num_dex_registers,
- BitVector* live_dex_registers_mask,
- size_t dex_register_locations_index) const;
- void CheckCodeInfo(MemoryRegion region) const;
-
- ScopedArenaAllocator* const allocator_;
+ ScopedArenaAllocator* allocator_;
const InstructionSet instruction_set_;
- ScopedArenaVector<StackMapEntry> stack_maps_;
-
- // A catalog of unique [location_kind, register_value] pairs (per method).
- ScopedArenaVector<DexRegisterLocation> location_catalog_entries_;
- // Map from Dex register location catalog entries to their indices in the
- // location catalog.
- using LocationCatalogEntriesIndices = ScopedArenaHashMap<DexRegisterLocation,
- size_t,
- LocationCatalogEntriesIndicesEmptyFn,
- DexRegisterLocationHashFn>;
- LocationCatalogEntriesIndices location_catalog_entries_indices_;
-
- // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`.
- ScopedArenaVector<size_t> dex_register_locations_;
- ScopedArenaVector<InlineInfoEntry> inline_infos_;
- ScopedArenaVector<uint8_t> stack_masks_;
- ScopedArenaVector<uint32_t> register_masks_;
- ScopedArenaVector<uint32_t> method_indices_;
- ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_;
- int stack_mask_max_;
- uint32_t dex_pc_max_;
- uint32_t register_mask_max_;
- size_t number_of_stack_maps_with_inline_info_;
-
- ScopedArenaSafeMap<uint32_t, ScopedArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_;
-
- StackMapEntry current_entry_;
- InlineInfoEntry current_inline_info_;
- ScopedArenaVector<uint8_t> code_info_encoding_;
- size_t needed_size_;
- uint32_t current_dex_register_;
- bool in_inline_frame_;
-
- static constexpr uint32_t kNoSameDexMapFound = -1;
+ uint32_t packed_frame_size_ = 0;
+ uint32_t core_spill_mask_ = 0;
+ uint32_t fp_spill_mask_ = 0;
+ uint32_t num_dex_registers_ = 0;
+ BitTableBuilder<StackMap> stack_maps_;
+ BitTableBuilder<InlineInfo> inline_infos_;
+ BitTableBuilder<MethodInfo> method_infos_;
+ BitTableBuilder<RegisterMask> register_masks_;
+ BitmapTableBuilder stack_masks_;
+ BitmapTableBuilder dex_register_masks_;
+ BitTableBuilder<DexRegisterMapInfo> dex_register_maps_;
+ BitTableBuilder<DexRegisterInfo> dex_register_catalog_;
+
+ ScopedArenaVector<BitVector*> lazy_stack_masks_;
+
+ // Variables which track the current state between Begin/End calls;
+ bool in_method_ = false;
+ bool in_stack_map_ = false;
+ bool in_inline_info_ = false;
+ BitTableBuilder<StackMap>::Entry current_stack_map_;
+ ScopedArenaVector<BitTableBuilder<InlineInfo>::Entry> current_inline_infos_;
+ ScopedArenaVector<DexRegisterLocation> current_dex_registers_;
+ ScopedArenaVector<DexRegisterLocation> previous_dex_registers_;
+ ScopedArenaVector<uint32_t> dex_register_timestamp_; // Stack map index of last change.
+ size_t expected_num_dex_registers_;
+
+ // Temporary variables used in CreateDexRegisterMap.
+ // They are here so that we can reuse the reserved memory.
+ ArenaBitVector temp_dex_register_mask_;
+ ScopedArenaVector<BitTableBuilder<DexRegisterMapInfo>::Entry> temp_dex_register_map_;
+
+ // A set of lambda functions to be executed at the end to verify
+ // the encoded data. It is generally only used in debug builds.
+ std::vector<std::function<void(CodeInfo&)>> dchecks_;
DISALLOW_COPY_AND_ASSIGN(StackMapStream);
};
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 7e517f3485..d28f09fbba 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -18,6 +18,7 @@
#include "art_method.h"
#include "base/arena_bit_vector.h"
+#include "base/malloc_arena_pool.h"
#include "stack_map_stream.h"
#include "gtest/gtest.h"
@@ -28,14 +29,13 @@ namespace art {
// to the given bit vector. Returns true if they are same.
static bool CheckStackMask(
const CodeInfo& code_info,
- const CodeInfoEncoding& encoding,
const StackMap& stack_map,
const BitVector& bit_vector) {
- BitMemoryRegion stack_mask = code_info.GetStackMaskOf(encoding, stack_map);
- if (bit_vector.GetNumberOfBits() > encoding.stack_mask.encoding.BitSize()) {
+ BitMemoryRegion stack_mask = code_info.GetStackMaskOf(stack_map);
+ if (bit_vector.GetNumberOfBits() > stack_mask.size_in_bits()) {
return false;
}
- for (size_t i = 0; i < encoding.stack_mask.encoding.BitSize(); ++i) {
+ for (size_t i = 0; i < stack_mask.size_in_bits(); ++i) {
if (stack_mask.LoadBit(i) != bit_vector.IsBitSet(i)) {
return false;
}
@@ -45,93 +45,68 @@ static bool CheckStackMask(
using Kind = DexRegisterLocation::Kind;
+constexpr static uint32_t kPcAlign = GetInstructionSetInstructionAlignment(kRuntimeISA);
+
TEST(StackMapTest, Test1) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArenaBitVector sp_mask(&allocator, 0, false);
size_t number_of_dex_registers = 2;
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Short location.
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(2u, number_of_catalog_entries);
- DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
- // The Dex register location catalog contains:
- // - one 1-byte short Dex register location, and
- // - one 5-byte large Dex register location.
- size_t expected_location_catalog_size = 1u + 5u;
- ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
-
- StackMap stack_map = code_info.GetStackMapAt(0, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
- ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_dex_register_map_size = 1u + 1u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(0u, index0);
- ASSERT_EQ(1u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_TRUE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind());
+ ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes());
+ ASSERT_EQ(-2, dex_register_map[1].GetConstant());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1);
ASSERT_EQ(Kind::kInStack, location0.GetKind());
ASSERT_EQ(Kind::kConstant, location1.GetKind());
- ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
TEST(StackMapTest, Test2) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -139,7 +114,7 @@ TEST(StackMapTest, Test2) {
sp_mask1.SetBit(4);
size_t number_of_dex_registers = 2;
size_t number_of_dex_registers_in_inline_info = 0;
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 2);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info);
@@ -151,7 +126,7 @@ TEST(StackMapTest, Test2) {
ArenaBitVector sp_mask2(&allocator, 0, true);
sp_mask2.SetBit(3);
sp_mask2.SetBit(8);
- stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(1, 128 * kPcAlign, 0xFF, &sp_mask2);
stream.AddDexRegisterEntry(Kind::kInRegister, 18); // Short location.
stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location.
stream.EndStackMapEntry();
@@ -159,7 +134,7 @@ TEST(StackMapTest, Test2) {
ArenaBitVector sp_mask3(&allocator, 0, true);
sp_mask3.SetBit(1);
sp_mask3.SetBit(5);
- stream.BeginStackMapEntry(2, 192, 0xAB, &sp_mask3, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(2, 192 * kPcAlign, 0xAB, &sp_mask3);
stream.AddDexRegisterEntry(Kind::kInRegister, 6); // Short location.
stream.AddDexRegisterEntry(Kind::kInRegisterHigh, 8); // Short location.
stream.EndStackMapEntry();
@@ -167,256 +142,165 @@ TEST(StackMapTest, Test2) {
ArenaBitVector sp_mask4(&allocator, 0, true);
sp_mask4.SetBit(6);
sp_mask4.SetBit(7);
- stream.BeginStackMapEntry(3, 256, 0xCD, &sp_mask4, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(3, 256 * kPcAlign, 0xCD, &sp_mask4);
stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location, same in stack map 2.
stream.AddDexRegisterEntry(Kind::kInFpuRegisterHigh, 1); // Short location.
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(4u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(4u, code_info.GetNumberOfStackMaps());
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(7u, number_of_catalog_entries);
- DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
- // The Dex register location catalog contains:
- // - six 1-byte short Dex register locations, and
- // - one 5-byte large Dex register location.
- size_t expected_location_catalog_size = 6u * 1u + 5u;
- ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
// First stack map.
{
- StackMap stack_map = code_info.GetStackMapAt(0, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
- ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_dex_register_map_size = 1u + 1u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInStack, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(0, dex_register_map.GetStackOffsetInBytes(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(0u, index0);
- ASSERT_EQ(1u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_TRUE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind());
+ ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes());
+ ASSERT_EQ(-2, dex_register_map[1].GetConstant());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1);
ASSERT_EQ(Kind::kInStack, location0.GetKind());
ASSERT_EQ(Kind::kConstant, location1.GetKind());
- ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
- ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
- InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
- ASSERT_EQ(2u, inline_info.GetDepth(encoding.inline_info.encoding));
- ASSERT_EQ(3u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_EQ(2u, inline_info.GetDexPcAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_TRUE(inline_info.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1));
+ ASSERT_TRUE(stack_map.HasInlineInfo());
+ auto inline_infos = code_info.GetInlineInfosOf(stack_map);
+ ASSERT_EQ(2u, inline_infos.size());
+ ASSERT_EQ(3u, inline_infos[0].GetDexPc());
+ ASSERT_EQ(2u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
}
// Second stack map.
{
- StackMap stack_map = code_info.GetStackMapAt(1, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u, encoding)));
- ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(128u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask2));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_dex_register_map_size = 1u + 1u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(18, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(3, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(2u, index0);
- ASSERT_EQ(3u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ StackMap stack_map = code_info.GetStackMapAt(1);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1u)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(128u * kPcAlign)));
+ ASSERT_EQ(1u, stack_map.GetDexPc());
+ ASSERT_EQ(128u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0xFFu, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask2));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_TRUE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[1].GetKind());
+ ASSERT_EQ(18, dex_register_map[0].GetMachineRegister());
+ ASSERT_EQ(3, dex_register_map[1].GetMachineRegister());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(2);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(3);
ASSERT_EQ(Kind::kInRegister, location0.GetKind());
ASSERT_EQ(Kind::kInFpuRegister, location1.GetKind());
- ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind());
- ASSERT_EQ(Kind::kInFpuRegister, location1.GetInternalKind());
ASSERT_EQ(18, location0.GetValue());
ASSERT_EQ(3, location1.GetValue());
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
// Third stack map.
{
- StackMap stack_map = code_info.GetStackMapAt(2, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u, encoding)));
- ASSERT_EQ(2u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(192u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask3));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_dex_register_map_size = 1u + 1u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(6, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(8, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(4u, index0);
- ASSERT_EQ(5u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ StackMap stack_map = code_info.GetStackMapAt(2);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(2u)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(192u * kPcAlign)));
+ ASSERT_EQ(2u, stack_map.GetDexPc());
+ ASSERT_EQ(192u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0xABu, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask3));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_TRUE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInRegister, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kInRegisterHigh, dex_register_map[1].GetKind());
+ ASSERT_EQ(6, dex_register_map[0].GetMachineRegister());
+ ASSERT_EQ(8, dex_register_map[1].GetMachineRegister());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(4);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(5);
ASSERT_EQ(Kind::kInRegister, location0.GetKind());
ASSERT_EQ(Kind::kInRegisterHigh, location1.GetKind());
- ASSERT_EQ(Kind::kInRegister, location0.GetInternalKind());
- ASSERT_EQ(Kind::kInRegisterHigh, location1.GetInternalKind());
ASSERT_EQ(6, location0.GetValue());
ASSERT_EQ(8, location1.GetValue());
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
// Fourth stack map.
{
- StackMap stack_map = code_info.GetStackMapAt(3, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u, encoding)));
- ASSERT_EQ(3u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(256u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask4));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_dex_register_map_size = 1u + 1u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInFpuRegister, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(3, dex_register_map.GetMachineRegister(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(1, dex_register_map.GetMachineRegister(
- 1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(3u, index0); // Shared with second stack map.
- ASSERT_EQ(6u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ StackMap stack_map = code_info.GetStackMapAt(3);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(3u)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(256u * kPcAlign)));
+ ASSERT_EQ(3u, stack_map.GetDexPc());
+ ASSERT_EQ(256u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0xCDu, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask4));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_TRUE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(2u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInFpuRegister, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kInFpuRegisterHigh, dex_register_map[1].GetKind());
+ ASSERT_EQ(3, dex_register_map[0].GetMachineRegister());
+ ASSERT_EQ(1, dex_register_map[1].GetMachineRegister());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(3);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(6);
ASSERT_EQ(Kind::kInFpuRegister, location0.GetKind());
ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetKind());
- ASSERT_EQ(Kind::kInFpuRegister, location0.GetInternalKind());
- ASSERT_EQ(Kind::kInFpuRegisterHigh, location1.GetInternalKind());
ASSERT_EQ(3, location0.GetValue());
ASSERT_EQ(1, location1.GetValue());
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
}
TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -424,7 +308,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
sp_mask1.SetBit(4);
const size_t number_of_dex_registers = 2;
const size_t number_of_dex_registers_in_inline_info = 2;
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, number_of_dex_registers, 1);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.BeginInlineInfoEntry(&art_method, 3, number_of_dex_registers_in_inline_info);
@@ -433,338 +317,204 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) {
stream.EndInlineInfoEntry();
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(2u, number_of_catalog_entries);
- DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
- // The Dex register location catalog contains:
- // - one 1-byte short Dex register locations, and
- // - one 5-byte large Dex register location.
- const size_t expected_location_catalog_size = 1u + 5u;
- ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
// First stack map.
{
- StackMap stack_map = code_info.GetStackMapAt(0, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
- ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(CheckStackMask(code_info, encoding, stack_map, sp_mask1));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers));
- ASSERT_TRUE(map.IsDexRegisterLive(0));
- ASSERT_TRUE(map.IsDexRegisterLive(1));
- ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask, and
- // - one 1-byte set of location catalog entry indices composed of two 2-bit values.
- size_t expected_map_size = 1u + 1u;
- ASSERT_EQ(expected_map_size, map.Size());
-
- ASSERT_EQ(Kind::kInStack, map.GetLocationKind(0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstant,
- map.GetLocationKind(1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kInStack,
- map.GetLocationInternalKind(0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstantLargeValue,
- map.GetLocationInternalKind(1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(0, map.GetStackOffsetInBytes(0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(-2, map.GetConstant(1, number_of_dex_registers, code_info, encoding));
-
- const size_t index0 =
- map.GetLocationCatalogEntryIndex(0, number_of_dex_registers, number_of_catalog_entries);
- const size_t index1 =
- map.GetLocationCatalogEntryIndex(1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(0u, index0);
- ASSERT_EQ(1u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(CheckStackMask(code_info, stack_map, sp_mask1));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap map(code_info.GetDexRegisterMapOf(stack_map));
+ ASSERT_EQ(number_of_dex_registers, map.size());
+ ASSERT_TRUE(map[0].IsLive());
+ ASSERT_TRUE(map[1].IsLive());
+ ASSERT_EQ(2u, map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kInStack, map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, map[1].GetKind());
+ ASSERT_EQ(0, map[0].GetStackOffsetInBytes());
+ ASSERT_EQ(-2, map[1].GetConstant());
+
+ DexRegisterLocation location0 = code_info.GetDexRegisterCatalogEntry(0);
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(1);
ASSERT_EQ(Kind::kInStack, location0.GetKind());
ASSERT_EQ(Kind::kConstant, location1.GetKind());
- ASSERT_EQ(Kind::kInStack, location0.GetInternalKind());
- ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
-
- // Test that the inline info dex register map deduplicated to the same offset as the stack map
- // one.
- ASSERT_TRUE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
- InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
- EXPECT_EQ(inline_info.GetDexRegisterMapOffsetAtDepth(encoding.inline_info.encoding, 0),
- stack_map.GetDexRegisterMapOffset(encoding.stack_map.encoding));
}
}
TEST(StackMapTest, TestNonLiveDexRegisters) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
stream.AddDexRegisterEntry(Kind::kNone, 0); // No location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(1u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(1u, number_of_catalog_entries);
- DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
- // The Dex register location catalog contains:
- // - one 5-byte large Dex register location.
- size_t expected_location_catalog_size = 5u;
- ASSERT_EQ(expected_location_catalog_size, location_catalog.Size());
-
- StackMap stack_map = code_info.GetStackMapAt(0, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
- ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_TRUE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- DexRegisterMap dex_register_map =
- code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_dex_registers);
- ASSERT_FALSE(dex_register_map.IsDexRegisterLive(0));
- ASSERT_TRUE(dex_register_map.IsDexRegisterLive(1));
- ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters(number_of_dex_registers));
- // The Dex register map contains:
- // - one 1-byte live bit mask.
- // No space is allocated for the sole location catalog entry index, as it is useless.
- size_t expected_dex_register_map_size = 1u + 0u;
- ASSERT_EQ(expected_dex_register_map_size, dex_register_map.Size());
-
- ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstant, dex_register_map.GetLocationKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kNone, dex_register_map.GetLocationInternalKind(
- 0, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(Kind::kConstantLargeValue, dex_register_map.GetLocationInternalKind(
- 1, number_of_dex_registers, code_info, encoding));
- ASSERT_EQ(-2, dex_register_map.GetConstant(1, number_of_dex_registers, code_info, encoding));
-
- size_t index0 = dex_register_map.GetLocationCatalogEntryIndex(
- 0, number_of_dex_registers, number_of_catalog_entries);
- size_t index1 = dex_register_map.GetLocationCatalogEntryIndex(
- 1, number_of_dex_registers, number_of_catalog_entries);
- ASSERT_EQ(DexRegisterLocationCatalog::kNoLocationEntryIndex, index0);
- ASSERT_EQ(0u, index1);
- DexRegisterLocation location0 = location_catalog.GetDexRegisterLocation(index0);
- DexRegisterLocation location1 = location_catalog.GetDexRegisterLocation(index1);
- ASSERT_EQ(Kind::kNone, location0.GetKind());
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+ ASSERT_EQ(number_of_dex_registers, dex_register_map.size());
+ ASSERT_FALSE(dex_register_map[0].IsLive());
+ ASSERT_TRUE(dex_register_map[1].IsLive());
+ ASSERT_EQ(1u, dex_register_map.GetNumberOfLiveDexRegisters());
+
+ ASSERT_EQ(Kind::kNone, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind());
+ ASSERT_EQ(-2, dex_register_map[1].GetConstant());
+
+ DexRegisterLocation location1 = code_info.GetDexRegisterCatalogEntry(0);
ASSERT_EQ(Kind::kConstant, location1.GetKind());
- ASSERT_EQ(Kind::kNone, location0.GetInternalKind());
- ASSERT_EQ(Kind::kConstantLargeValue, location1.GetInternalKind());
- ASSERT_EQ(0, location0.GetValue());
ASSERT_EQ(-2, location1.GetValue());
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
-}
-
-// Generate a stack map whose dex register offset is
-// StackMap::kNoDexRegisterMapSmallEncoding, and ensure we do
-// not treat it as kNoDexRegisterMap.
-TEST(StackMapTest, DexRegisterMapOffsetOverflow) {
- ArenaPool pool;
- ArenaStack arena_stack(&pool);
- ScopedArenaAllocator allocator(&arena_stack);
- StackMapStream stream(&allocator, kRuntimeISA);
-
- ArenaBitVector sp_mask(&allocator, 0, false);
- uint32_t number_of_dex_registers = 1024;
- // Create the first stack map (and its Dex register map).
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- uint32_t number_of_dex_live_registers_in_dex_register_map_0 = number_of_dex_registers - 8;
- for (uint32_t i = 0; i < number_of_dex_live_registers_in_dex_register_map_0; ++i) {
- // Use two different Dex register locations to populate this map,
- // as using a single value (in the whole CodeInfo object) would
- // make this Dex register mapping data empty (see
- // art::DexRegisterMap::SingleEntrySizeInBits).
- stream.AddDexRegisterEntry(Kind::kConstant, i % 2); // Short location.
- }
- stream.EndStackMapEntry();
- // Create the second stack map (and its Dex register map).
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
- for (uint32_t i = 0; i < number_of_dex_registers; ++i) {
- stream.AddDexRegisterEntry(Kind::kConstant, 0); // Short location.
- }
- stream.EndStackMapEntry();
-
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
-
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- // The location catalog contains two entries (DexRegisterLocation(kConstant, 0)
- // and DexRegisterLocation(kConstant, 1)), therefore the location catalog index
- // has a size of 1 bit.
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
- ASSERT_EQ(2u, number_of_catalog_entries);
- ASSERT_EQ(1u, DexRegisterMap::SingleEntrySizeInBits(number_of_catalog_entries));
-
- // The first Dex register map contains:
- // - a live register bit mask for 1024 registers (that is, 128 bytes of
- // data); and
- // - Dex register mapping information for 1016 1-bit Dex (live) register
- // locations (that is, 127 bytes of data).
- // Hence it has a size of 255 bytes, and therefore...
- ASSERT_EQ(128u, DexRegisterMap::GetLiveBitMaskSize(number_of_dex_registers));
- StackMap stack_map0 = code_info.GetStackMapAt(0, encoding);
- DexRegisterMap dex_register_map0 =
- code_info.GetDexRegisterMapOf(stack_map0, encoding, number_of_dex_registers);
- ASSERT_EQ(127u, dex_register_map0.GetLocationMappingDataSize(number_of_dex_registers,
- number_of_catalog_entries));
- ASSERT_EQ(255u, dex_register_map0.Size());
-
- StackMap stack_map1 = code_info.GetStackMapAt(1, encoding);
- ASSERT_TRUE(stack_map1.HasDexRegisterMap(encoding.stack_map.encoding));
- // ...the offset of the second Dex register map (relative to the
- // beginning of the Dex register maps region) is 255 (i.e.,
- // kNoDexRegisterMapSmallEncoding).
- ASSERT_NE(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding),
- StackMap::kNoDexRegisterMap);
- ASSERT_EQ(stack_map1.GetDexRegisterMapOffset(encoding.stack_map.encoding), 0xFFu);
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
TEST(StackMapTest, TestShareDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 2;
// First stack map.
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.EndStackMapEntry();
// Second stack map, which should share the same dex register map.
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 65 * kPcAlign, 0x3, &sp_mask);
stream.AddDexRegisterEntry(Kind::kInRegister, 0); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.EndStackMapEntry();
// Third stack map (doesn't share the dex register map).
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 66 * kPcAlign, 0x3, &sp_mask);
stream.AddDexRegisterEntry(Kind::kInRegister, 2); // Short location.
stream.AddDexRegisterEntry(Kind::kConstant, -2); // Large location.
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo ci(region);
- CodeInfoEncoding encoding = ci.ExtractEncoding();
+ CodeInfo ci(memory.data());
// Verify first stack map.
- StackMap sm0 = ci.GetStackMapAt(0, encoding);
- DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers0.GetMachineRegister(0, number_of_dex_registers, ci, encoding));
- ASSERT_EQ(-2, dex_registers0.GetConstant(1, number_of_dex_registers, ci, encoding));
+ StackMap sm0 = ci.GetStackMapAt(0);
+ DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0);
+ ASSERT_EQ(number_of_dex_registers, dex_registers0.size());
+ ASSERT_EQ(0, dex_registers0[0].GetMachineRegister());
+ ASSERT_EQ(-2, dex_registers0[1].GetConstant());
// Verify second stack map.
- StackMap sm1 = ci.GetStackMapAt(1, encoding);
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1, encoding, number_of_dex_registers);
- ASSERT_EQ(0, dex_registers1.GetMachineRegister(0, number_of_dex_registers, ci, encoding));
- ASSERT_EQ(-2, dex_registers1.GetConstant(1, number_of_dex_registers, ci, encoding));
+ StackMap sm1 = ci.GetStackMapAt(1);
+ DexRegisterMap dex_registers1 = ci.GetDexRegisterMapOf(sm1);
+ ASSERT_EQ(number_of_dex_registers, dex_registers1.size());
+ ASSERT_EQ(0, dex_registers1[0].GetMachineRegister());
+ ASSERT_EQ(-2, dex_registers1[1].GetConstant());
// Verify third stack map.
- StackMap sm2 = ci.GetStackMapAt(2, encoding);
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2, encoding, number_of_dex_registers);
- ASSERT_EQ(2, dex_registers2.GetMachineRegister(0, number_of_dex_registers, ci, encoding));
- ASSERT_EQ(-2, dex_registers2.GetConstant(1, number_of_dex_registers, ci, encoding));
-
- // Verify dex register map offsets.
- ASSERT_EQ(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding),
- sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding));
- ASSERT_NE(sm0.GetDexRegisterMapOffset(encoding.stack_map.encoding),
- sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding));
- ASSERT_NE(sm1.GetDexRegisterMapOffset(encoding.stack_map.encoding),
- sm2.GetDexRegisterMapOffset(encoding.stack_map.encoding));
+ StackMap sm2 = ci.GetStackMapAt(2);
+ DexRegisterMap dex_registers2 = ci.GetDexRegisterMapOf(sm2);
+ ASSERT_EQ(number_of_dex_registers, dex_registers2.size());
+ ASSERT_EQ(2, dex_registers2[0].GetMachineRegister());
+ ASSERT_EQ(-2, dex_registers2[1].GetConstant());
+
+ // Verify dex register mask offsets.
+ ASSERT_FALSE(sm1.HasDexRegisterMaskIndex()); // No delta.
+ ASSERT_TRUE(sm2.HasDexRegisterMaskIndex()); // Has delta.
}
TEST(StackMapTest, TestNoDexRegisterMap) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 1);
ArenaBitVector sp_mask(&allocator, 0, false);
uint32_t number_of_dex_registers = 0;
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign, 0x3, &sp_mask);
stream.EndStackMapEntry();
number_of_dex_registers = 1;
- stream.BeginStackMapEntry(1, 68, 0x4, &sp_mask, number_of_dex_registers, 0);
+ stream.BeginStackMapEntry(1, 68 * kPcAlign, 0x4, &sp_mask);
+ stream.AddDexRegisterEntry(Kind::kNone, 0);
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
- uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries(encoding);
+ uint32_t number_of_catalog_entries = code_info.GetNumberOfLocationCatalogEntries();
ASSERT_EQ(0u, number_of_catalog_entries);
- DexRegisterLocationCatalog location_catalog = code_info.GetDexRegisterLocationCatalog(encoding);
- ASSERT_EQ(0u, location_catalog.Size());
-
- StackMap stack_map = code_info.GetStackMapAt(0, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64, encoding)));
- ASSERT_EQ(0u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(64u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
-
- stack_map = code_info.GetStackMapAt(1, encoding);
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1, encoding)));
- ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68, encoding)));
- ASSERT_EQ(1u, stack_map.GetDexPc(encoding.stack_map.encoding));
- ASSERT_EQ(68u, stack_map.GetNativePcOffset(encoding.stack_map.encoding, kRuntimeISA));
- ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(encoding, stack_map));
-
- ASSERT_FALSE(stack_map.HasDexRegisterMap(encoding.stack_map.encoding));
- ASSERT_FALSE(stack_map.HasInlineInfo(encoding.stack_map.encoding));
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x3u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_FALSE(stack_map.HasDexRegisterMap());
+ ASSERT_FALSE(stack_map.HasInlineInfo());
+
+ stack_map = code_info.GetStackMapAt(1);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(1)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(68 * kPcAlign)));
+ ASSERT_EQ(1u, stack_map.GetDexPc());
+ ASSERT_EQ(68u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+ ASSERT_EQ(0x4u, code_info.GetRegisterMaskOf(stack_map));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ ASSERT_FALSE(stack_map.HasInlineInfo());
}
TEST(StackMapTest, InlineTest) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
ArtMethod art_method;
ArenaBitVector sp_mask1(&allocator, 0, true);
@@ -772,7 +522,7 @@ TEST(StackMapTest, InlineTest) {
sp_mask1.SetBit(4);
// First stack map.
- stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask1, 2, 2);
+ stream.BeginStackMapEntry(0, 10 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kInStack, 0);
stream.AddDexRegisterEntry(Kind::kConstant, 4);
@@ -788,7 +538,7 @@ TEST(StackMapTest, InlineTest) {
stream.EndStackMapEntry();
// Second stack map.
- stream.BeginStackMapEntry(2, 22, 0x3, &sp_mask1, 2, 3);
+ stream.BeginStackMapEntry(2, 22 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kInStack, 56);
stream.AddDexRegisterEntry(Kind::kConstant, 0);
@@ -806,13 +556,13 @@ TEST(StackMapTest, InlineTest) {
stream.EndStackMapEntry();
// Third stack map.
- stream.BeginStackMapEntry(4, 56, 0x3, &sp_mask1, 2, 0);
+ stream.BeginStackMapEntry(4, 56 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kNone, 0);
stream.AddDexRegisterEntry(Kind::kConstant, 4);
stream.EndStackMapEntry();
// Fourth stack map.
- stream.BeginStackMapEntry(6, 78, 0x3, &sp_mask1, 2, 3);
+ stream.BeginStackMapEntry(6, 78 * kPcAlign, 0x3, &sp_mask1);
stream.AddDexRegisterEntry(Kind::kInStack, 56);
stream.AddDexRegisterEntry(Kind::kConstant, 0);
@@ -828,204 +578,202 @@ TEST(StackMapTest, InlineTest) {
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo ci(region);
- CodeInfoEncoding encoding = ci.ExtractEncoding();
+ CodeInfo ci(memory.data());
{
// Verify first stack map.
- StackMap sm0 = ci.GetStackMapAt(0, encoding);
-
- DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0, encoding, 2);
- ASSERT_EQ(0, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding));
- ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
-
- InlineInfo if0 = ci.GetInlineInfoOf(sm0, encoding);
- ASSERT_EQ(2u, if0.GetDepth(encoding.inline_info.encoding));
- ASSERT_EQ(2u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_EQ(3u, if0.GetDexPcAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_TRUE(if0.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1));
-
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if0, encoding, 1);
- ASSERT_EQ(8, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
-
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if0, encoding, 3);
- ASSERT_EQ(16, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding));
- ASSERT_EQ(20, dex_registers2.GetConstant(1, 3, ci, encoding));
- ASSERT_EQ(15, dex_registers2.GetMachineRegister(2, 3, ci, encoding));
+ StackMap sm0 = ci.GetStackMapAt(0);
+
+ DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm0);
+ ASSERT_EQ(2u, dex_registers0.size());
+ ASSERT_EQ(0, dex_registers0[0].GetStackOffsetInBytes());
+ ASSERT_EQ(4, dex_registers0[1].GetConstant());
+
+ auto inline_infos = ci.GetInlineInfosOf(sm0);
+ ASSERT_EQ(2u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(3u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
+
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[0]);
+ ASSERT_EQ(1u, dex_registers1.size());
+ ASSERT_EQ(8, dex_registers1[0].GetStackOffsetInBytes());
+
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm0, inline_infos[1]);
+ ASSERT_EQ(3u, dex_registers2.size());
+ ASSERT_EQ(16, dex_registers2[0].GetStackOffsetInBytes());
+ ASSERT_EQ(20, dex_registers2[1].GetConstant());
+ ASSERT_EQ(15, dex_registers2[2].GetMachineRegister());
}
{
// Verify second stack map.
- StackMap sm1 = ci.GetStackMapAt(1, encoding);
-
- DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1, encoding, 2);
- ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding));
- ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
-
- InlineInfo if1 = ci.GetInlineInfoOf(sm1, encoding);
- ASSERT_EQ(3u, if1.GetDepth(encoding.inline_info.encoding));
- ASSERT_EQ(2u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_EQ(3u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_EQ(5u, if1.GetDexPcAtDepth(encoding.inline_info.encoding, 2));
- ASSERT_TRUE(if1.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2));
-
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(0, if1, encoding, 1);
- ASSERT_EQ(12, dex_registers1.GetStackOffsetInBytes(0, 1, ci, encoding));
-
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(1, if1, encoding, 3);
- ASSERT_EQ(80, dex_registers2.GetStackOffsetInBytes(0, 3, ci, encoding));
- ASSERT_EQ(10, dex_registers2.GetConstant(1, 3, ci, encoding));
- ASSERT_EQ(5, dex_registers2.GetMachineRegister(2, 3, ci, encoding));
-
- ASSERT_FALSE(if1.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 2));
+ StackMap sm1 = ci.GetStackMapAt(1);
+
+ DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm1);
+ ASSERT_EQ(2u, dex_registers0.size());
+ ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes());
+ ASSERT_EQ(0, dex_registers0[1].GetConstant());
+
+ auto inline_infos = ci.GetInlineInfosOf(sm1);
+ ASSERT_EQ(3u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(3u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
+ ASSERT_EQ(5u, inline_infos[2].GetDexPc());
+ ASSERT_TRUE(inline_infos[2].EncodesArtMethod());
+
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[0]);
+ ASSERT_EQ(1u, dex_registers1.size());
+ ASSERT_EQ(12, dex_registers1[0].GetStackOffsetInBytes());
+
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm1, inline_infos[1]);
+ ASSERT_EQ(3u, dex_registers2.size());
+ ASSERT_EQ(80, dex_registers2[0].GetStackOffsetInBytes());
+ ASSERT_EQ(10, dex_registers2[1].GetConstant());
+ ASSERT_EQ(5, dex_registers2[2].GetMachineRegister());
}
{
// Verify third stack map.
- StackMap sm2 = ci.GetStackMapAt(2, encoding);
+ StackMap sm2 = ci.GetStackMapAt(2);
- DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2, encoding, 2);
- ASSERT_FALSE(dex_registers0.IsDexRegisterLive(0));
- ASSERT_EQ(4, dex_registers0.GetConstant(1, 2, ci, encoding));
- ASSERT_FALSE(sm2.HasInlineInfo(encoding.stack_map.encoding));
+ DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm2);
+ ASSERT_EQ(2u, dex_registers0.size());
+ ASSERT_FALSE(dex_registers0[0].IsLive());
+ ASSERT_EQ(4, dex_registers0[1].GetConstant());
+ ASSERT_FALSE(sm2.HasInlineInfo());
}
{
// Verify fourth stack map.
- StackMap sm3 = ci.GetStackMapAt(3, encoding);
-
- DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3, encoding, 2);
- ASSERT_EQ(56, dex_registers0.GetStackOffsetInBytes(0, 2, ci, encoding));
- ASSERT_EQ(0, dex_registers0.GetConstant(1, 2, ci, encoding));
-
- InlineInfo if2 = ci.GetInlineInfoOf(sm3, encoding);
- ASSERT_EQ(3u, if2.GetDepth(encoding.inline_info.encoding));
- ASSERT_EQ(2u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 0));
- ASSERT_EQ(5u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 1));
- ASSERT_EQ(10u, if2.GetDexPcAtDepth(encoding.inline_info.encoding, 2));
- ASSERT_TRUE(if2.EncodesArtMethodAtDepth(encoding.inline_info.encoding, 2));
-
- ASSERT_FALSE(if2.HasDexRegisterMapAtDepth(encoding.inline_info.encoding, 0));
-
- DexRegisterMap dex_registers1 = ci.GetDexRegisterMapAtDepth(1, if2, encoding, 1);
- ASSERT_EQ(2, dex_registers1.GetMachineRegister(0, 1, ci, encoding));
-
- DexRegisterMap dex_registers2 = ci.GetDexRegisterMapAtDepth(2, if2, encoding, 2);
- ASSERT_FALSE(dex_registers2.IsDexRegisterLive(0));
- ASSERT_EQ(3, dex_registers2.GetMachineRegister(1, 2, ci, encoding));
+ StackMap sm3 = ci.GetStackMapAt(3);
+
+ DexRegisterMap dex_registers0 = ci.GetDexRegisterMapOf(sm3);
+ ASSERT_EQ(2u, dex_registers0.size());
+ ASSERT_EQ(56, dex_registers0[0].GetStackOffsetInBytes());
+ ASSERT_EQ(0, dex_registers0[1].GetConstant());
+
+ auto inline_infos = ci.GetInlineInfosOf(sm3);
+ ASSERT_EQ(3u, inline_infos.size());
+ ASSERT_EQ(2u, inline_infos[0].GetDexPc());
+ ASSERT_TRUE(inline_infos[0].EncodesArtMethod());
+ ASSERT_EQ(5u, inline_infos[1].GetDexPc());
+ ASSERT_TRUE(inline_infos[1].EncodesArtMethod());
+ ASSERT_EQ(10u, inline_infos[2].GetDexPc());
+ ASSERT_TRUE(inline_infos[2].EncodesArtMethod());
+
+ DexRegisterMap dex_registers1 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[1]);
+ ASSERT_EQ(1u, dex_registers1.size());
+ ASSERT_EQ(2, dex_registers1[0].GetMachineRegister());
+
+ DexRegisterMap dex_registers2 = ci.GetInlineDexRegisterMapOf(sm3, inline_infos[2]);
+ ASSERT_EQ(2u, dex_registers2.size());
+ ASSERT_FALSE(dex_registers2[0].IsLive());
+ ASSERT_EQ(3, dex_registers2[1].GetMachineRegister());
}
}
-TEST(StackMapTest, CodeOffsetTest) {
- // Test minimum alignments, encoding, and decoding.
- CodeOffset offset_thumb2 =
- CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2);
- CodeOffset offset_arm64 =
- CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64);
- CodeOffset offset_x86 =
- CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86);
- CodeOffset offset_x86_64 =
- CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64);
- CodeOffset offset_mips =
- CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips);
- CodeOffset offset_mips64 =
- CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64);
- EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment);
- EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment);
- EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment);
- EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment);
- EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment);
- EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment);
+TEST(StackMapTest, PackedNativePcTest) {
+ // Test minimum alignments, and decoding.
+ uint32_t packed_thumb2 =
+ StackMap::PackNativePc(kThumb2InstructionAlignment, InstructionSet::kThumb2);
+ uint32_t packed_arm64 =
+ StackMap::PackNativePc(kArm64InstructionAlignment, InstructionSet::kArm64);
+ uint32_t packed_x86 =
+ StackMap::PackNativePc(kX86InstructionAlignment, InstructionSet::kX86);
+ uint32_t packed_x86_64 =
+ StackMap::PackNativePc(kX86_64InstructionAlignment, InstructionSet::kX86_64);
+ uint32_t packed_mips =
+ StackMap::PackNativePc(kMipsInstructionAlignment, InstructionSet::kMips);
+ uint32_t packed_mips64 =
+ StackMap::PackNativePc(kMips64InstructionAlignment, InstructionSet::kMips64);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_thumb2, InstructionSet::kThumb2),
+ kThumb2InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_arm64, InstructionSet::kArm64),
+ kArm64InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_x86, InstructionSet::kX86),
+ kX86InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_x86_64, InstructionSet::kX86_64),
+ kX86_64InstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_mips, InstructionSet::kMips),
+ kMipsInstructionAlignment);
+ EXPECT_EQ(StackMap::UnpackNativePc(packed_mips64, InstructionSet::kMips64),
+ kMips64InstructionAlignment);
}
TEST(StackMapTest, TestDeduplicateStackMask) {
- ArenaPool pool;
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 0);
ArenaBitVector sp_mask(&allocator, 0, true);
sp_mask.SetBit(1);
sp_mask.SetBit(4);
- stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
+ stream.BeginStackMapEntry(0, 4 * kPcAlign, 0x3, &sp_mask);
stream.EndStackMapEntry();
- stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
+ stream.BeginStackMapEntry(0, 8 * kPcAlign, 0x3, &sp_mask);
stream.EndStackMapEntry();
- size_t size = stream.PrepareForFillIn();
- void* memory = allocator.Alloc(size, kArenaAllocMisc);
- MemoryRegion region(memory, size);
- stream.FillInCodeInfo(region);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
- CodeInfo code_info(region);
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(2u, code_info.GetNumberOfStackMaps(encoding));
+ CodeInfo code_info(memory.data());
+ ASSERT_EQ(2u, code_info.GetNumberOfStackMaps());
- StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4, encoding);
- StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8, encoding);
- EXPECT_EQ(stack_map1.GetStackMaskIndex(encoding.stack_map.encoding),
- stack_map2.GetStackMaskIndex(encoding.stack_map.encoding));
+ StackMap stack_map1 = code_info.GetStackMapForNativePcOffset(4 * kPcAlign);
+ StackMap stack_map2 = code_info.GetStackMapForNativePcOffset(8 * kPcAlign);
+ EXPECT_EQ(stack_map1.GetStackMaskIndex(),
+ stack_map2.GetStackMaskIndex());
}
-TEST(StackMapTest, TestInvokeInfo) {
- ArenaPool pool;
+TEST(StackMapTest, TestDedupeBitTables) {
+ MallocArenaPool pool;
ArenaStack arena_stack(&pool);
ScopedArenaAllocator allocator(&arena_stack);
StackMapStream stream(&allocator, kRuntimeISA);
+ stream.BeginMethod(32, 0, 0, 2);
- ArenaBitVector sp_mask(&allocator, 0, true);
- sp_mask.SetBit(1);
- stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0);
- stream.AddInvoke(kSuper, 1);
- stream.EndStackMapEntry();
- stream.BeginStackMapEntry(0, 8, 0x3, &sp_mask, 0, 0);
- stream.AddInvoke(kStatic, 3);
- stream.EndStackMapEntry();
- stream.BeginStackMapEntry(0, 16, 0x3, &sp_mask, 0, 0);
- stream.AddInvoke(kDirect, 65535);
+ stream.BeginStackMapEntry(0, 64 * kPcAlign);
+ stream.AddDexRegisterEntry(Kind::kInStack, 0);
+ stream.AddDexRegisterEntry(Kind::kConstant, -2);
stream.EndStackMapEntry();
- const size_t code_info_size = stream.PrepareForFillIn();
- MemoryRegion code_info_region(allocator.Alloc(code_info_size, kArenaAllocMisc), code_info_size);
- stream.FillInCodeInfo(code_info_region);
-
- const size_t method_info_size = stream.ComputeMethodInfoSize();
- MemoryRegion method_info_region(allocator.Alloc(method_info_size, kArenaAllocMisc),
- method_info_size);
- stream.FillInMethodInfo(method_info_region);
-
- CodeInfo code_info(code_info_region);
- MethodInfo method_info(method_info_region.begin());
- CodeInfoEncoding encoding = code_info.ExtractEncoding();
- ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding));
-
- InvokeInfo invoke1(code_info.GetInvokeInfoForNativePcOffset(4, encoding));
- InvokeInfo invoke2(code_info.GetInvokeInfoForNativePcOffset(8, encoding));
- InvokeInfo invoke3(code_info.GetInvokeInfoForNativePcOffset(16, encoding));
- InvokeInfo invoke_invalid(code_info.GetInvokeInfoForNativePcOffset(12, encoding));
- EXPECT_FALSE(invoke_invalid.IsValid()); // No entry for that index.
- EXPECT_TRUE(invoke1.IsValid());
- EXPECT_TRUE(invoke2.IsValid());
- EXPECT_TRUE(invoke3.IsValid());
- EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper);
- EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u);
- EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u);
- EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic);
- EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u);
- EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u);
- EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect);
- EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u);
- EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u);
+ stream.EndMethod();
+ ScopedArenaVector<uint8_t> memory = stream.Encode();
+
+ std::vector<uint8_t> out;
+ CodeInfo::Deduper deduper(&out);
+ size_t deduped1 = deduper.Dedupe(memory.data());
+ size_t deduped2 = deduper.Dedupe(memory.data());
+
+ for (size_t deduped : { deduped1, deduped2 }) {
+ CodeInfo code_info(out.data() + deduped);
+ ASSERT_EQ(1u, code_info.GetNumberOfStackMaps());
+
+ StackMap stack_map = code_info.GetStackMapAt(0);
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForDexPc(0)));
+ ASSERT_TRUE(stack_map.Equals(code_info.GetStackMapForNativePcOffset(64 * kPcAlign)));
+ ASSERT_EQ(0u, stack_map.GetDexPc());
+ ASSERT_EQ(64u * kPcAlign, stack_map.GetNativePcOffset(kRuntimeISA));
+
+ ASSERT_TRUE(stack_map.HasDexRegisterMap());
+ DexRegisterMap dex_register_map = code_info.GetDexRegisterMapOf(stack_map);
+
+ ASSERT_EQ(Kind::kInStack, dex_register_map[0].GetKind());
+ ASSERT_EQ(Kind::kConstant, dex_register_map[1].GetKind());
+ ASSERT_EQ(0, dex_register_map[0].GetStackOffsetInBytes());
+ ASSERT_EQ(-2, dex_register_map[1].GetConstant());
+ }
+
+ ASSERT_GT(memory.size() * 2, out.size());
}
} // namespace art
diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc
index a7c23bef7e..dc433feb51 100644
--- a/compiler/optimizing/superblock_cloner.cc
+++ b/compiler/optimizing/superblock_cloner.cc
@@ -17,6 +17,7 @@
#include "superblock_cloner.h"
#include "common_dominator.h"
+#include "induction_var_range.h"
#include "graph_checker.h"
#include <iostream>
@@ -70,20 +71,18 @@ static bool ArePhiInputsTheSame(const HPhi* phi) {
return true;
}
-// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
-// graph.
-static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
- if (loop1 != nullptr || loop2 != nullptr) {
- return nullptr;
+// Returns whether two Edge sets are equal (ArenaHashSet doesn't have "Equal" method).
+static bool EdgeHashSetsEqual(const HEdgeSet* set1, const HEdgeSet* set2) {
+ if (set1->size() != set2->size()) {
+ return false;
}
- if (loop1->IsIn(*loop2)) {
- return loop2;
- } else if (loop2->IsIn(*loop1)) {
- return loop1;
+ for (auto e : *set1) {
+ if (set2->find(e) == set2->end()) {
+ return false;
+ }
}
- HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader());
- return block->GetLoopInformation();
+ return true;
}
// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph.
@@ -95,6 +94,21 @@ static void OrderLoopsHeadersPredecessors(HGraph* graph) {
}
}
+// Performs DFS on the subgraph (specified by 'bb_set') starting from the specified block; while
+// traversing the function removes basic blocks from the bb_set (instead of traditional DFS
+// 'marking'). So what is left in the 'bb_set' after the traversal is not reachable from the start
+// block.
+static void TraverseSubgraphForConnectivity(HBasicBlock* block, HBasicBlockSet* bb_set) {
+ DCHECK(bb_set->IsBitSet(block->GetBlockId()));
+ bb_set->ClearBit(block->GetBlockId());
+
+ for (HBasicBlock* succ : block->GetSuccessors()) {
+ if (bb_set->IsBitSet(succ->GetBlockId())) {
+ TraverseSubgraphForConnectivity(succ, bb_set);
+ }
+ }
+}
+
//
// Helpers for CloneBasicBlock.
//
@@ -268,7 +282,6 @@ void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVect
}
void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) {
- // TODO: DCHECK that after the transformation the graph is connected.
HBasicBlock* block_entry = nullptr;
if (outer_loop_ == nullptr) {
@@ -397,7 +410,7 @@ void SuperblockCloner::ResolvePhi(HPhi* phi) {
// Main algorithm methods.
//
-void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) {
+void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const {
DCHECK(exits->empty());
for (uint32_t block_id : orig_bb_set_.Indexes()) {
HBasicBlock* block = GetBlockById(block_id);
@@ -424,6 +437,11 @@ void SuperblockCloner::FindAndSetLocalAreaForAdjustments() {
outer_loop_ = nullptr;
break;
}
+ if (outer_loop_ == nullptr) {
+ // We should not use the initial outer_loop_ value 'nullptr' when finding the most outer
+ // common loop.
+ outer_loop_ = loop_exit_loop_info;
+ }
outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info);
}
@@ -455,8 +473,8 @@ void SuperblockCloner::RemapEdgesSuccessors() {
continue;
}
- auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id));
- auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id));
+ auto orig_redir = remap_orig_internal_->find(HEdge(orig_block_id, orig_succ_id));
+ auto copy_redir = remap_copy_internal_->find(HEdge(orig_block_id, orig_succ_id));
// Due to construction all successors of copied block were set to original.
if (copy_redir != remap_copy_internal_->end()) {
@@ -504,9 +522,152 @@ void SuperblockCloner::ResolveDataFlow() {
}
//
+// Helpers for live-outs processing and Subgraph-closed SSA.
+//
+
+bool SuperblockCloner::CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs) const {
+ DCHECK(live_outs->empty());
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ HBasicBlock* block = GetBlockById(idx);
+
+ for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ DCHECK(instr->IsClonable());
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+
+ for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+ HInstruction* instr = it.Current();
+ if (!instr->IsClonable()) {
+ return false;
+ }
+
+ if (IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ // TODO: Investigate why HNewInstance, HCheckCast has a requirement for the input.
+ if (instr->IsLoadClass()) {
+ return false;
+ }
+ live_outs->FindOrAdd(instr, instr);
+ }
+ }
+ }
+ return true;
+}
+
+void SuperblockCloner::UpdateInductionRangeInfoOf(
+ HInstruction* user, HInstruction* old_instruction, HInstruction* replacement) {
+ if (induction_range_ != nullptr) {
+ induction_range_->Replace(user, old_instruction, replacement);
+ }
+}
+
+void SuperblockCloner::ConstructSubgraphClosedSSA() {
+ if (live_outs_.empty()) {
+ return;
+ }
+
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+ if (exits.empty()) {
+ DCHECK(live_outs_.empty());
+ return;
+ }
+
+ DCHECK_EQ(exits.size(), 1u);
+ HBasicBlock* exit_block = exits[0];
+ // There should be no critical edges.
+ DCHECK_EQ(exit_block->GetPredecessors().size(), 1u);
+ DCHECK(exit_block->GetPhis().IsEmpty());
+
+ // For each live-out value insert a phi into the loop exit and replace all the value's uses
+ // external to the loop with this phi. The phi will have the original value as its only input;
+ // after copying is done FixSubgraphClosedSSAAfterCloning will add a corresponding copy of the
+ // original value as the second input thus merging data flow from the original and copy parts of
+ // the subgraph. Also update the record in the live_outs_ map from (value, value) to
+ // (value, new_phi).
+ for (auto live_out_it = live_outs_.begin(); live_out_it != live_outs_.end(); ++live_out_it) {
+ HInstruction* value = live_out_it->first;
+ HPhi* phi = new (arena_) HPhi(arena_, kNoRegNumber, 0, value->GetType());
+
+ if (value->GetType() == DataType::Type::kReference) {
+ phi->SetReferenceTypeInfo(value->GetReferenceTypeInfo());
+ }
+
+ exit_block->AddPhi(phi);
+ live_out_it->second = phi;
+
+ const HUseList<HInstruction*>& uses = value->GetUses();
+ for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+ HInstruction* user = it->GetUser();
+ size_t index = it->GetIndex();
+ // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+ ++it;
+ if (!IsInOrigBBSet(user->GetBlock())) {
+ user->ReplaceInput(phi, index);
+ UpdateInductionRangeInfoOf(user, value, phi);
+ }
+ }
+
+ const HUseList<HEnvironment*>& env_uses = value->GetEnvUses();
+ for (auto it = env_uses.begin(), e = env_uses.end(); it != e; /* ++it below */) {
+ HEnvironment* env = it->GetUser();
+ size_t index = it->GetIndex();
+ ++it;
+ if (!IsInOrigBBSet(env->GetHolder()->GetBlock())) {
+ env->ReplaceInput(phi, index);
+ }
+ }
+
+ phi->AddInput(value);
+ }
+}
+
+void SuperblockCloner::FixSubgraphClosedSSAAfterCloning() {
+ for (auto it : live_outs_) {
+ DCHECK(it.first != it.second);
+ HInstruction* orig_value = it.first;
+ HPhi* phi = it.second->AsPhi();
+ HInstruction* copy_value = GetInstrCopy(orig_value);
+ // Copy edges are inserted after the original so we can just add new input to the phi.
+ phi->AddInput(copy_value);
+ }
+}
+
+//
// Debug and logging methods.
//
+// Debug function to dump graph' BasicBlocks info.
+void DumpBB(HGraph* graph) {
+ for (HBasicBlock* bb : graph->GetBlocks()) {
+ if (bb == nullptr) {
+ continue;
+ }
+ std::cout << bb->GetBlockId();
+ std::cout << " <- ";
+ for (HBasicBlock* pred : bb->GetPredecessors()) {
+ std::cout << pred->GetBlockId() << " ";
+ }
+ std::cout << " -> ";
+ for (HBasicBlock* succ : bb->GetSuccessors()) {
+ std::cout << succ->GetBlockId() << " ";
+ }
+
+ if (bb->GetDominator()) {
+ std::cout << " dom " << bb->GetDominator()->GetBlockId();
+ }
+
+ if (bb->GetLoopInformation()) {
+ std::cout << "\tloop: " << bb->GetLoopInformation()->GetHeader()->GetBlockId();
+ }
+
+ std::cout << std::endl;
+ }
+}
+
void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) {
DCHECK(!orig_instr->IsPhi());
HInstruction* copy_instr = GetInstrCopy(orig_instr);
@@ -542,6 +703,81 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr)
}
}
+bool SuperblockCloner::CheckRemappingInfoIsValid() {
+ for (HEdge edge : *remap_orig_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_copy_internal_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ !IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ for (auto edge : *remap_incoming_) {
+ if (!IsEdgeValid(edge, graph_) ||
+ IsInOrigBBSet(edge.GetFrom()) ||
+ !IsInOrigBBSet(edge.GetTo())) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void SuperblockCloner::VerifyGraph() {
+ for (auto it : *hir_map_) {
+ HInstruction* orig_instr = it.first;
+ HInstruction* copy_instr = it.second;
+ if (!orig_instr->IsPhi() && !orig_instr->IsSuspendCheck()) {
+ DCHECK(it.first->GetBlock() != nullptr);
+ }
+ if (!copy_instr->IsPhi() && !copy_instr->IsSuspendCheck()) {
+ DCHECK(it.second->GetBlock() != nullptr);
+ }
+ }
+
+ GraphChecker checker(graph_);
+ checker.Run();
+ if (!checker.IsValid()) {
+ for (const std::string& error : checker.GetErrors()) {
+ std::cout << error << std::endl;
+ }
+ LOG(FATAL) << "GraphChecker failed: superblock cloner\n";
+ }
+}
+
+void DumpBBSet(const ArenaBitVector* set) {
+ for (uint32_t idx : set->Indexes()) {
+ std::cout << idx << "\n";
+ }
+}
+
+void SuperblockCloner::DumpInputSets() {
+ std::cout << "orig_bb_set:\n";
+ for (uint32_t idx : orig_bb_set_.Indexes()) {
+ std::cout << idx << "\n";
+ }
+ std::cout << "remap_orig_internal:\n";
+ for (HEdge e : *remap_orig_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_copy_internal:\n";
+ for (auto e : *remap_copy_internal_) {
+ std::cout << e << "\n";
+ }
+ std::cout << "remap_incoming:\n";
+ for (auto e : *remap_incoming_) {
+ std::cout << e << "\n";
+ }
+}
+
//
// Public methods.
//
@@ -549,7 +785,8 @@ void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr)
SuperblockCloner::SuperblockCloner(HGraph* graph,
const HBasicBlockSet* orig_bb_set,
HBasicBlockMap* bb_map,
- HInstructionMap* hir_map)
+ HInstructionMap* hir_map,
+ InductionVarRange* induction_range)
: graph_(graph),
arena_(graph->GetAllocator()),
orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
@@ -558,8 +795,11 @@ SuperblockCloner::SuperblockCloner(HGraph* graph,
remap_incoming_(nullptr),
bb_map_(bb_map),
hir_map_(hir_map),
+ induction_range_(induction_range),
outer_loop_(nullptr),
- outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) {
+ outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner),
+ live_outs_(std::less<HInstruction*>(),
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)) {
orig_bb_set_.Copy(orig_bb_set);
}
@@ -569,6 +809,7 @@ void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_inte
remap_orig_internal_ = remap_orig_internal;
remap_copy_internal_ = remap_copy_internal;
remap_incoming_ = remap_incoming;
+ DCHECK(CheckRemappingInfoIsValid());
}
bool SuperblockCloner::IsSubgraphClonable() const {
@@ -577,29 +818,79 @@ bool SuperblockCloner::IsSubgraphClonable() const {
return false;
}
- // Check that there are no instructions defined in the subgraph and used outside.
- // TODO: Improve this by accepting graph with such uses but only one exit.
+ HInstructionMap live_outs(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ if (!CollectLiveOutsAndCheckClonable(&live_outs)) {
+ return false;
+ }
+
+ ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner));
+ SearchForSubgraphExits(&exits);
+
+ // The only loops with live-outs which are currently supported are loops with a single exit.
+ if (!live_outs.empty() && exits.size() != 1) {
+ return false;
+ }
+
+ return true;
+}
+
+bool SuperblockCloner::IsFastCase() const {
+ // Check that loop unrolling/loop peeling is being conducted.
+ // Check that all the basic blocks belong to the same loop.
+ bool flag = false;
+ HLoopInformation* common_loop_info = nullptr;
for (uint32_t idx : orig_bb_set_.Indexes()) {
HBasicBlock* block = GetBlockById(idx);
-
- for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
+ HLoopInformation* block_loop_info = block->GetLoopInformation();
+ if (!flag) {
+ common_loop_info = block_loop_info;
+ } else {
+ if (block_loop_info != common_loop_info) {
return false;
}
}
+ }
- for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
- HInstruction* instr = it.Current();
- if (!instr->IsClonable() ||
- IsUsedOutsideRegion(instr, orig_bb_set_)) {
- return false;
- }
- }
+ // Check that orig_bb_set_ corresponds to loop peeling/unrolling.
+ if (common_loop_info == nullptr || !orig_bb_set_.SameBitsSet(&common_loop_info->GetBlocks())) {
+ return false;
}
- return true;
+ bool peeling_or_unrolling = false;
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+
+ // Check whether remapping info corresponds to loop unrolling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ true,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ remap_orig_internal.clear();
+ remap_copy_internal.clear();
+ remap_incoming.clear();
+
+ // Check whether remapping info corresponds to loop peeling.
+ CollectRemappingInfoForPeelUnroll(/* to_unroll*/ false,
+ common_loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ peeling_or_unrolling |= EdgeHashSetsEqual(&remap_orig_internal, remap_orig_internal_) &&
+ EdgeHashSetsEqual(&remap_copy_internal, remap_copy_internal_) &&
+ EdgeHashSetsEqual(&remap_incoming, remap_incoming_);
+
+ return peeling_or_unrolling;
}
void SuperblockCloner::Run() {
@@ -609,19 +900,40 @@ void SuperblockCloner::Run() {
remap_copy_internal_ != nullptr &&
remap_incoming_ != nullptr);
DCHECK(IsSubgraphClonable());
+ DCHECK(IsFastCase());
+ if (kSuperblockClonerLogging) {
+ DumpInputSets();
+ }
+
+ CollectLiveOutsAndCheckClonable(&live_outs_);
// Find an area in the graph for which control flow information should be adjusted.
FindAndSetLocalAreaForAdjustments();
+ ConstructSubgraphClosedSSA();
// Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be
// adjusted.
CloneBasicBlocks();
// Connect the blocks together/remap successors and fix phis which are directly affected my the
// remapping.
RemapEdgesSuccessors();
+
+ // Check that the subgraph is connected.
+ if (kIsDebugBuild) {
+ HBasicBlockSet work_set(arena_, orig_bb_set_.GetSizeOf(), true, kArenaAllocSuperblockCloner);
+
+ // Add original and copy blocks of the subgraph to the work set.
+ for (auto iter : *bb_map_) {
+ work_set.SetBit(iter.first->GetBlockId()); // Original block.
+ work_set.SetBit(iter.second->GetBlockId()); // Copy block.
+ }
+ CHECK(IsSubgraphConnected(&work_set, graph_));
+ }
+
// Recalculate dominance and backedge information which is required by the next stage.
AdjustControlFlowInfo();
// Fix data flow of the graph.
ResolveDataFlow();
+ FixSubgraphClosedSSAAfterCloning();
}
void SuperblockCloner::CleanUp() {
@@ -650,6 +962,10 @@ void SuperblockCloner::CleanUp() {
}
}
}
+
+ if (kIsDebugBuild) {
+ VerifyGraph();
+ }
}
HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) {
@@ -701,4 +1017,135 @@ void SuperblockCloner::CloneBasicBlocks() {
}
}
+//
+// Stand-alone methods.
+//
+
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ HEdgeSet* remap_orig_internal,
+ HEdgeSet* remap_copy_internal,
+ HEdgeSet* remap_incoming) {
+ DCHECK(loop_info != nullptr);
+ HBasicBlock* loop_header = loop_info->GetHeader();
+ // Set up remap_orig_internal edges set - set is empty.
+ // Set up remap_copy_internal edges set.
+ for (HBasicBlock* back_edge_block : loop_info->GetBackEdges()) {
+ HEdge e = HEdge(back_edge_block, loop_header);
+ if (to_unroll) {
+ remap_orig_internal->insert(e);
+ remap_copy_internal->insert(e);
+ } else {
+ remap_copy_internal->insert(e);
+ }
+ }
+
+ // Set up remap_incoming edges set.
+ if (!to_unroll) {
+ remap_incoming->insert(HEdge(loop_info->GetPreHeader(), loop_header));
+ }
+}
+
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph) {
+ ArenaVector<HBasicBlock*> entry_blocks(
+ graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ // Find subgraph entry blocks.
+ for (uint32_t orig_block_id : work_set->Indexes()) {
+ HBasicBlock* block = graph->GetBlocks()[orig_block_id];
+ for (HBasicBlock* pred : block->GetPredecessors()) {
+ if (!work_set->IsBitSet(pred->GetBlockId())) {
+ entry_blocks.push_back(block);
+ break;
+ }
+ }
+ }
+
+ for (HBasicBlock* entry_block : entry_blocks) {
+ if (work_set->IsBitSet(entry_block->GetBlockId())) {
+ TraverseSubgraphForConnectivity(entry_block, work_set);
+ }
+ }
+
+ // Return whether there are unvisited - unreachable - blocks.
+ return work_set->NumSetBits() == 0;
+}
+
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) {
+ if (loop1 == nullptr || loop2 == nullptr) {
+ return nullptr;
+ }
+
+ if (loop1->IsIn(*loop2)) {
+ return loop2;
+ }
+
+ HLoopInformation* current = loop1;
+ while (current != nullptr && !loop2->IsIn(*current)) {
+ current = current->GetPreHeader()->GetLoopInformation();
+ }
+
+ return current;
+}
+
+bool PeelUnrollHelper::IsLoopClonable(HLoopInformation* loop_info) {
+ PeelUnrollHelper helper(
+ loop_info, /* bb_map= */ nullptr, /* hir_map= */ nullptr, /* induction_range= */ nullptr);
+ return helper.IsLoopClonable();
+}
+
+HBasicBlock* PeelUnrollHelper::DoPeelUnrollImpl(bool to_unroll) {
+ // For now do peeling only for natural loops.
+ DCHECK(!loop_info_->IsIrreducible());
+
+ HBasicBlock* loop_header = loop_info_->GetHeader();
+ // Check that loop info is up-to-date.
+ DCHECK(loop_info_ == loop_header->GetLoopInformation());
+ HGraph* graph = loop_header->GetGraph();
+
+ if (kSuperblockClonerLogging) {
+ std::cout << "Method: " << graph->PrettyMethod() << std::endl;
+ std::cout << "Scalar loop " << (to_unroll ? "unrolling" : "peeling") <<
+ " was applied to the loop <" << loop_header->GetBlockId() << ">." << std::endl;
+ }
+
+ ArenaAllocator allocator(graph->GetAllocator()->GetArenaPool());
+
+ HEdgeSet remap_orig_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(to_unroll,
+ loop_info_,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ cloner_.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+ cloner_.Run();
+ cloner_.CleanUp();
+
+ // Check that loop info is preserved.
+ DCHECK(loop_info_ == loop_header->GetLoopInformation());
+
+ return loop_header;
+}
+
+PeelUnrollSimpleHelper::PeelUnrollSimpleHelper(HLoopInformation* info,
+ InductionVarRange* induction_range)
+ : bb_map_(std::less<HBasicBlock*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ hir_map_(std::less<HInstruction*>(),
+ info->GetHeader()->GetGraph()->GetAllocator()->Adapter(kArenaAllocSuperblockCloner)),
+ helper_(info, &bb_map_, &hir_map_, induction_range) {}
+
} // namespace art
+
+namespace std {
+
+ostream& operator<<(ostream& os, const art::HEdge& e) {
+ e.Dump(os);
+ return os;
+}
+
+} // namespace std
diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h
index 23de692673..ece0914ddb 100644
--- a/compiler/optimizing/superblock_cloner.h
+++ b/compiler/optimizing/superblock_cloner.h
@@ -24,8 +24,9 @@
namespace art {
+class InductionVarRange;
+
static const bool kSuperblockClonerLogging = false;
-static const bool kSuperblockClonerVerify = false;
// Represents an edge between two HBasicBlocks.
//
@@ -141,7 +142,8 @@ class SuperblockCloner : public ValueObject {
SuperblockCloner(HGraph* graph,
const HBasicBlockSet* orig_bb_set,
HBasicBlockMap* bb_map,
- HInstructionMap* hir_map);
+ HInstructionMap* hir_map,
+ InductionVarRange* induction_range);
// Sets edge successor remapping info specified by corresponding edge sets.
void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal,
@@ -152,6 +154,15 @@ class SuperblockCloner : public ValueObject {
// TODO: Start from small range of graph patterns then extend it.
bool IsSubgraphClonable() const;
+ // Returns whether selected subgraph satisfies the criteria for fast data flow resolution
+ // when iterative DF algorithm is not required and dominators/instructions inputs can be
+ // trivially adjusted.
+ //
+ // TODO: formally describe the criteria.
+ //
+ // Loop peeling and unrolling satisfy the criteria.
+ bool IsFastCase() const;
+
// Runs the copy algorithm according to the description.
void Run();
@@ -202,11 +213,17 @@ class SuperblockCloner : public ValueObject {
return IsInOrigBBSet(block->GetBlockId());
}
+ // Returns the area (the most outer loop) in the graph for which control flow (back edges, loops,
+ // dominators) needs to be adjusted.
+ HLoopInformation* GetRegionToBeAdjusted() const {
+ return outer_loop_;
+ }
+
private:
// Fills the 'exits' vector with the subgraph exits.
- void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits);
+ void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) const;
- // Finds and records information about the area in the graph for which control-flow (back edges,
+ // Finds and records information about the area in the graph for which control flow (back edges,
// loops, dominators) needs to be adjusted.
void FindAndSetLocalAreaForAdjustments();
@@ -217,7 +234,7 @@ class SuperblockCloner : public ValueObject {
// phis' nor instructions' inputs values are resolved.
void RemapEdgesSuccessors();
- // Adjusts control-flow (back edges, loops, dominators) for the local area defined by
+ // Adjusts control flow (back edges, loops, dominators) for the local area defined by
// FindAndSetLocalAreaForAdjustments.
void AdjustControlFlowInfo();
@@ -226,6 +243,33 @@ class SuperblockCloner : public ValueObject {
void ResolveDataFlow();
//
+ // Helpers for live-outs processing and Subgraph-closed SSA.
+ //
+ // - live-outs - values which are defined inside the subgraph and have uses outside.
+ // - Subgraph-closed SSA - SSA form for which all the values defined inside the subgraph
+ // have no outside uses except for the phi-nodes in the subgraph exits.
+ //
+ // Note: now if the subgraph has live-outs it is only clonable if it has a single exit; this
+ // makes the subgraph-closed SSA form construction much easier.
+ //
+ // TODO: Support subgraphs with live-outs and multiple exits.
+ //
+
+ // For each live-out value 'val' in the region puts a record <val, val> into the map.
+ // Returns whether all of the instructions in the subgraph are clonable.
+ bool CollectLiveOutsAndCheckClonable(HInstructionMap* live_outs_) const;
+
+ // Constructs Subgraph-closed SSA; precondition - a subgraph has a single exit.
+ //
+ // For each live-out 'val' in 'live_outs_' map inserts a HPhi 'phi' into the exit node, updates
+ // the record in the map to <val, phi> and replaces all outside uses with this phi.
+ void ConstructSubgraphClosedSSA();
+
+ // Fixes the data flow for the live-out 'val' by adding a 'copy_val' input to the corresponding
+ // (<val, phi>) phi after the cloning is done.
+ void FixSubgraphClosedSSAAfterCloning();
+
+ //
// Helpers for CloneBasicBlock.
//
@@ -268,10 +312,17 @@ class SuperblockCloner : public ValueObject {
// Resolves the inputs of the phi.
void ResolvePhi(HPhi* phi);
+ // Update induction range after when fixing SSA.
+ void UpdateInductionRangeInfoOf(
+ HInstruction* user, HInstruction* old_instruction, HInstruction* replacement);
+
//
// Debug and logging methods.
//
void CheckInstructionInputsRemapping(HInstruction* orig_instr);
+ bool CheckRemappingInfoIsValid();
+ void VerifyGraph();
+ void DumpInputSets();
HBasicBlock* GetBlockById(uint32_t block_id) const {
DCHECK(block_id < graph_->GetBlocks().size());
@@ -295,15 +346,103 @@ class SuperblockCloner : public ValueObject {
HBasicBlockMap* bb_map_;
// Correspondence map for instructions: (original HInstruction, copy HInstruction).
HInstructionMap* hir_map_;
- // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted.
+ // As a result of cloning, the induction range analysis information can be invalidated
+ // and must be updated. If not null, the cloner updates it for changed instructions.
+ InductionVarRange* induction_range_;
+ // Area in the graph for which control flow (back edges, loops, dominators) needs to be adjusted.
HLoopInformation* outer_loop_;
HBasicBlockSet outer_loop_bb_set_;
+ HInstructionMap live_outs_;
+
ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo);
+ ART_FRIEND_TEST(SuperblockClonerTest, IsGraphConnected);
DISALLOW_COPY_AND_ASSIGN(SuperblockCloner);
};
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when correspondence map between original and copied
+// basic blocks/instructions are demanded.
+class PeelUnrollHelper : public ValueObject {
+ public:
+ PeelUnrollHelper(HLoopInformation* info,
+ SuperblockCloner::HBasicBlockMap* bb_map,
+ SuperblockCloner::HInstructionMap* hir_map,
+ InductionVarRange* induction_range) :
+ loop_info_(info),
+ cloner_(info->GetHeader()->GetGraph(), &info->GetBlocks(), bb_map, hir_map, induction_range) {
+ // For now do peeling/unrolling only for natural loops.
+ DCHECK(!info->IsIrreducible());
+ }
+
+ // Returns whether the loop can be peeled/unrolled (static function).
+ static bool IsLoopClonable(HLoopInformation* loop_info);
+
+ // Returns whether the loop can be peeled/unrolled.
+ bool IsLoopClonable() const { return cloner_.IsSubgraphClonable(); }
+
+ HBasicBlock* DoPeeling() { return DoPeelUnrollImpl(/* to_unroll= */ false); }
+ HBasicBlock* DoUnrolling() { return DoPeelUnrollImpl(/* to_unroll= */ true); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return cloner_.GetRegionToBeAdjusted(); }
+
+ protected:
+ // Applies loop peeling/unrolling for the loop specified by 'loop_info'.
+ //
+ // Depending on 'do_unroll' either unrolls loop by 2 or peels one iteration from it.
+ HBasicBlock* DoPeelUnrollImpl(bool to_unroll);
+
+ private:
+ HLoopInformation* loop_info_;
+ SuperblockCloner cloner_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollHelper);
+};
+
+// Helper class to perform loop peeling/unrolling.
+//
+// This helper should be used when there is no need to get correspondence information between
+// original and copied basic blocks/instructions.
+class PeelUnrollSimpleHelper : public ValueObject {
+ public:
+ PeelUnrollSimpleHelper(HLoopInformation* info, InductionVarRange* induction_range);
+ bool IsLoopClonable() const { return helper_.IsLoopClonable(); }
+ HBasicBlock* DoPeeling() { return helper_.DoPeeling(); }
+ HBasicBlock* DoUnrolling() { return helper_.DoUnrolling(); }
+ HLoopInformation* GetRegionToBeAdjusted() const { return helper_.GetRegionToBeAdjusted(); }
+
+ const SuperblockCloner::HBasicBlockMap* GetBasicBlockMap() const { return &bb_map_; }
+ const SuperblockCloner::HInstructionMap* GetInstructionMap() const { return &hir_map_; }
+
+ private:
+ SuperblockCloner::HBasicBlockMap bb_map_;
+ SuperblockCloner::HInstructionMap hir_map_;
+ PeelUnrollHelper helper_;
+
+ DISALLOW_COPY_AND_ASSIGN(PeelUnrollSimpleHelper);
+};
+
+// Collects edge remapping info for loop peeling/unrolling for the loop specified by loop info.
+void CollectRemappingInfoForPeelUnroll(bool to_unroll,
+ HLoopInformation* loop_info,
+ SuperblockCloner::HEdgeSet* remap_orig_internal,
+ SuperblockCloner::HEdgeSet* remap_copy_internal,
+ SuperblockCloner::HEdgeSet* remap_incoming);
+
+// Returns whether blocks from 'work_set' are reachable from the rest of the graph.
+//
+// Returns whether such a set 'outer_entries' of basic blocks exists that:
+// - each block from 'outer_entries' is not from 'work_set'.
+// - each block from 'work_set' is reachable from at least one block from 'outer_entries'.
+//
+// After the function returns work_set contains only blocks from the original 'work_set'
+// which are unreachable from the rest of the graph.
+bool IsSubgraphConnected(SuperblockCloner::HBasicBlockSet* work_set, HGraph* graph);
+
+// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole
+// graph.
+HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2);
} // namespace art
namespace std {
@@ -312,11 +451,12 @@ template <>
struct hash<art::HEdge> {
size_t operator()(art::HEdge const& x) const noexcept {
// Use Cantor pairing function as the hash function.
- uint32_t a = x.GetFrom();
- uint32_t b = x.GetTo();
+ size_t a = x.GetFrom();
+ size_t b = x.GetTo();
return (a + b) * (a + b + 1) / 2 + b;
}
};
+ostream& operator<<(ostream& os, const art::HEdge& e);
} // namespace std
diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc
index f1b7bffdf5..aa19de683f 100644
--- a/compiler/optimizing/superblock_cloner_test.cc
+++ b/compiler/optimizing/superblock_cloner_test.cc
@@ -25,52 +25,35 @@ namespace art {
using HBasicBlockMap = SuperblockCloner::HBasicBlockMap;
using HInstructionMap = SuperblockCloner::HInstructionMap;
+using HBasicBlockSet = SuperblockCloner::HBasicBlockSet;
+using HEdgeSet = SuperblockCloner::HEdgeSet;
// This class provides methods and helpers for testing various cloning and copying routines:
// individual instruction cloning and cloning of the more coarse-grain structures.
-class SuperblockClonerTest : public OptimizingUnitTest {
+class SuperblockClonerTest : public ImprovedOptimizingUnitTest {
public:
- SuperblockClonerTest()
- : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {}
-
- void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p,
+ void CreateBasicLoopControlFlow(HBasicBlock* position,
+ HBasicBlock* successor,
+ /* out */ HBasicBlock** header_p,
/* out */ HBasicBlock** body_p) {
- entry_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(entry_block_);
- graph_->SetEntryBlock(entry_block_);
-
HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_);
HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_);
- HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_);
graph_->AddBlock(loop_preheader);
graph_->AddBlock(loop_header);
graph_->AddBlock(loop_body);
- graph_->AddBlock(loop_exit);
- exit_block_ = new (GetAllocator()) HBasicBlock(graph_);
- graph_->AddBlock(exit_block_);
- graph_->SetExitBlock(exit_block_);
+ position->ReplaceSuccessor(successor, loop_preheader);
- entry_block_->AddSuccessor(loop_preheader);
loop_preheader->AddSuccessor(loop_header);
// Loop exit first to have a proper exit condition/target for HIf.
- loop_header->AddSuccessor(loop_exit);
+ loop_header->AddSuccessor(successor);
loop_header->AddSuccessor(loop_body);
loop_body->AddSuccessor(loop_header);
- loop_exit->AddSuccessor(exit_block_);
*header_p = loop_header;
*body_p = loop_body;
-
- parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(),
- dex::TypeIndex(0),
- 0,
- DataType::Type::kInt32);
- entry_block_->AddInstruction(parameter_);
- loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid());
- exit_block_->AddInstruction(new (GetAllocator()) HExit());
}
void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) {
@@ -84,11 +67,12 @@ class SuperblockClonerTest : public OptimizingUnitTest {
// Header block.
HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32);
HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck();
+ HInstruction* loop_check = new (GetAllocator()) HGreaterThanOrEqual(phi, const_128);
loop_header->AddPhi(phi);
loop_header->AddInstruction(suspend_check);
- loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128));
- loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_));
+ loop_header->AddInstruction(loop_check);
+ loop_header->AddInstruction(new (GetAllocator()) HIf(loop_check));
// Loop body block.
HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc);
@@ -97,8 +81,8 @@ class SuperblockClonerTest : public OptimizingUnitTest {
HInstruction* array_get =
new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc);
HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1);
- HInstruction* array_set =
- new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
+ HInstruction* array_set = new (GetAllocator()) HArraySet(
+ null_check, bounds_check, add, DataType::Type::kInt32, dex_pc);
HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1);
loop_body->AddInstruction(null_check);
@@ -123,49 +107,17 @@ class SuperblockClonerTest : public OptimizingUnitTest {
null_check->CopyEnvironmentFrom(env);
bounds_check->CopyEnvironmentFrom(env);
}
-
- HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction,
- ArenaVector<HInstruction*>* current_locals) {
- HEnvironment* environment = new (GetAllocator()) HEnvironment(
- (GetAllocator()),
- current_locals->size(),
- graph_->GetArtMethod(),
- instruction->GetDexPc(),
- instruction);
-
- environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals));
- instruction->SetRawEnvironment(environment);
- return environment;
- }
-
- bool CheckGraph() {
- GraphChecker checker(graph_);
- checker.Run();
- if (!checker.IsValid()) {
- for (const std::string& error : checker.GetErrors()) {
- std::cout << error << std::endl;
- }
- return false;
- }
- return true;
- }
-
- HGraph* graph_;
-
- HBasicBlock* entry_block_;
- HBasicBlock* exit_block_;
-
- HInstruction* parameter_;
};
TEST_F(SuperblockClonerTest, IndividualInstrCloner) {
HBasicBlock* header = nullptr;
HBasicBlock* loop_body = nullptr;
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
- ASSERT_TRUE(CheckGraph());
+ EXPECT_TRUE(CheckGraph());
HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck();
CloneAndReplaceInstructionVisitor visitor(graph_);
@@ -193,7 +145,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -209,7 +162,8 @@ TEST_F(SuperblockClonerTest, CloneBasicBlocks) {
SuperblockCloner cloner(graph_,
&orig_bb_set,
&bb_map,
- &hir_map);
+ &hir_map,
+ /* induction_range= */ nullptr);
EXPECT_TRUE(cloner.IsSubgraphClonable());
cloner.CloneBasicBlocks();
@@ -272,7 +226,8 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
HBasicBlock* loop_body = nullptr;
ArenaAllocator* arena = graph_->GetAllocator();
- CreateBasicLoopControlFlow(&header, &loop_body);
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
CreateBasicLoopDataFlow(header, loop_body);
graph_->BuildDominatorTree();
ASSERT_TRUE(CheckGraph());
@@ -285,8 +240,9 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
SuperblockCloner cloner(graph_,
&orig_bb_set,
- nullptr,
- nullptr);
+ /* bb_map= */ nullptr,
+ /* hir_map= */ nullptr,
+ /* induction_range= */ nullptr);
EXPECT_TRUE(cloner.IsSubgraphClonable());
cloner.FindAndSetLocalAreaForAdjustments();
@@ -303,4 +259,488 @@ TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) {
EXPECT_TRUE(loop_info->IsBackEdge(*loop_body));
}
+// Tests IsSubgraphConnected function for negative case.
+TEST_F(SuperblockClonerTest, IsGraphConnected) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* unreachable_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(unreachable_block);
+
+ HBasicBlockSet bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ bb_set.SetBit(header->GetBlockId());
+ bb_set.SetBit(loop_body->GetBlockId());
+ bb_set.SetBit(unreachable_block->GetBlockId());
+
+ EXPECT_FALSE(IsSubgraphConnected(&bb_set, graph_));
+ EXPECT_EQ(bb_set.NumSetBits(), 1u);
+ EXPECT_TRUE(bb_set.IsBitSet(unreachable_block->GetBlockId()));
+}
+
+// Tests SuperblockCloner for loop peeling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|
+// / \ / / \
+// v v/ / v
+// |4| |3| / |3A| (7)
+// | / /
+// v | v
+// |E| \ |2|<-\
+// \ / \ /
+// v v /
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopPeeling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoPeeling();
+ HLoopInformation* new_loop_info = new_header->GetLoopInformation();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), header);
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(new_loop_info->GetHeader(), header);
+ EXPECT_EQ(new_loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(new_loop_info->GetBackEdges()[0], loop_body);
+}
+
+// Tests SuperblockCloner for loop unrolling case.
+//
+// Control Flow of the example (ignoring critical edges splitting).
+//
+// Before After
+//
+// |B| |B|
+// | |
+// v v
+// |1| |1|
+// | |
+// v v
+// |2|<-\ (6) |2A|<-\
+// / \ / / \ \
+// v v/ / v \
+// |4| |3| /(7)|3A| |
+// | / / /
+// v | v /
+// |E| \ |2| /
+// \ / \ /
+// v v/
+// |4| |3|
+// |
+// v
+// |E|
+TEST_F(SuperblockClonerTest, LoopUnrolling) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlockMap bb_map(
+ std::less<HBasicBlock*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(
+ std::less<HInstruction*>(), graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollHelper helper(loop_info, &bb_map, &hir_map, /* induction_range= */ nullptr);
+ EXPECT_TRUE(helper.IsLoopClonable());
+ HBasicBlock* new_header = helper.DoUnrolling();
+
+ EXPECT_TRUE(CheckGraph());
+
+ // Check loop body successors.
+ EXPECT_EQ(loop_body->GetSingleSuccessor(), bb_map.Get(header));
+ EXPECT_EQ(bb_map.Get(loop_body)->GetSingleSuccessor(), header);
+
+ // Check loop structure.
+ EXPECT_EQ(header, new_header);
+ EXPECT_EQ(loop_info, new_header->GetLoopInformation());
+ EXPECT_EQ(loop_info->GetHeader(), new_header);
+ EXPECT_EQ(loop_info->GetBackEdges().size(), 1u);
+ EXPECT_EQ(loop_info->GetBackEdges()[0], bb_map.Get(loop_body));
+}
+
+// Checks that loop unrolling works fine for a loop with multiple back edges. Tests that after
+// the transformation the loop has a single preheader.
+TEST_F(SuperblockClonerTest, LoopPeelingMultipleBackEdges) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ // Transform a basic loop to have multiple back edges.
+ HBasicBlock* latch = header->GetSuccessors()[1];
+ HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_);
+ HBasicBlock* temp1 = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(if_block);
+ graph_->AddBlock(temp1);
+ header->ReplaceSuccessor(latch, if_block);
+ if_block->AddSuccessor(latch);
+ if_block->AddSuccessor(temp1);
+ temp1->AddSuccessor(header);
+
+ if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ HInstructionIterator it(header->GetPhis());
+ DCHECK(!it.Done());
+ HPhi* loop_phi = it.Current()->AsPhi();
+ HInstruction* temp_add = new (GetAllocator()) HAdd(DataType::Type::kInt32,
+ loop_phi,
+ graph_->GetIntConstant(2));
+ temp1->AddInstruction(temp_add);
+ temp1->AddInstruction(new (GetAllocator()) HGoto());
+ loop_phi->AddInput(temp_add);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+ PeelUnrollSimpleHelper helper(loop_info, /* induction_range= */ nullptr);
+ HBasicBlock* new_header = helper.DoPeeling();
+ EXPECT_EQ(header, new_header);
+
+ EXPECT_TRUE(CheckGraph());
+ EXPECT_EQ(header->GetPredecessors().size(), 3u);
+}
+
+static void CheckLoopStructureForLoopPeelingNested(HBasicBlock* loop1_header,
+ HBasicBlock* loop2_header,
+ HBasicBlock* loop3_header) {
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetHeader(), loop1_header);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetHeader(), loop2_header);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetHeader(), loop3_header);
+ EXPECT_EQ(loop1_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop2_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop3_header->GetLoopInformation()->GetPreHeader()->GetLoopInformation()->GetHeader(),
+ loop2_header);
+}
+
+TEST_F(SuperblockClonerTest, LoopPeelingNested) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3
+ // [ ], [ [ ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop2_info_before = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3_info_before = loop3_header->GetLoopInformation();
+
+ // Check nested loops structure.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+ PeelUnrollSimpleHelper helper(loop1_header->GetLoopInformation(), /* induction_range= */ nullptr);
+ helper.DoPeeling();
+ // Check that nested loops structure has not changed after the transformation.
+ CheckLoopStructureForLoopPeelingNested(loop1_header, loop2_header, loop3_header);
+
+ // Test that the loop info is preserved.
+ EXPECT_EQ(loop2_info_before, loop2_header->GetLoopInformation());
+ EXPECT_EQ(loop3_info_before, loop3_header->GetLoopInformation());
+
+ EXPECT_EQ(loop3_info_before->GetPreHeader()->GetLoopInformation(), loop2_info_before);
+ EXPECT_EQ(loop2_info_before->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), nullptr);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks that the loop population is correctly propagated after an inner loop is peeled.
+TEST_F(SuperblockClonerTest, OuterLoopPopulationAfterInnerPeeled) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4
+ // [ [ [ ] ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr);
+ helper.DoPeeling();
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->Contains(*loop2_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ // Check that loop4 info has not been touched after local run of AnalyzeLoops.
+ EXPECT_EQ(loop4, loop4_header->GetLoopInformation());
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(!loop4->IsIn(*loop1));
+
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), nullptr);
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop2);
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+// Checks the case when inner loop have an exit not to its immediate outer_loop but some other loop
+// in the hierarchy. Loop population information must be valid after loop peeling.
+TEST_F(SuperblockClonerTest, NestedCaseExitToOutermost) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops then peel loop3.
+ // Headers: 1 2 3
+ // [ [ [ ] ] ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+ HBasicBlock* loop_body1 = loop_body;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+ HBasicBlock* loop_body3 = loop_body;
+
+ // Change the loop3 - insert an exit which leads to loop1.
+ HBasicBlock* loop3_extra_if_block = new (GetAllocator()) HBasicBlock(graph_);
+ graph_->AddBlock(loop3_extra_if_block);
+ loop3_extra_if_block->AddInstruction(new (GetAllocator()) HIf(parameter_));
+
+ loop3_header->ReplaceSuccessor(loop_body3, loop3_extra_if_block);
+ loop3_extra_if_block->AddSuccessor(loop_body1); // Long exit.
+ loop3_extra_if_block->AddSuccessor(loop_body3);
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HBasicBlock* loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1_header->GetLoopInformation()->Contains(*loop3_long_exit));
+
+ PeelUnrollSimpleHelper helper(loop3_header->GetLoopInformation(), /* induction_range= */ nullptr);
+ helper.DoPeeling();
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ // Check that after the transformation the local area for CF adjustments has been chosen
+ // correctly and loop population has been updated.
+ loop3_long_exit = loop3_extra_if_block->GetSuccessors()[0];
+ EXPECT_TRUE(loop1->Contains(*loop3_long_exit));
+
+ EXPECT_EQ(helper.GetRegionToBeAdjusted(), loop1);
+
+ EXPECT_TRUE(loop1->Contains(*loop3_header));
+ EXPECT_TRUE(loop1->Contains(*loop3_header->GetLoopInformation()->GetPreHeader()));
+
+ EXPECT_TRUE(CheckGraph());
+}
+
+TEST_F(SuperblockClonerTest, FastCaseCheck) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+ ArenaAllocator* arena = graph_->GetAllocator();
+
+ InitGraph();
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ graph_->BuildDominatorTree();
+
+ HLoopInformation* loop_info = header->GetLoopInformation();
+
+ ArenaBitVector orig_bb_set(
+ arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner);
+ orig_bb_set.Union(&loop_info->GetBlocks());
+
+ HEdgeSet remap_orig_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_copy_internal(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+ HEdgeSet remap_incoming(graph_->GetAllocator()->Adapter(kArenaAllocSuperblockCloner));
+
+ CollectRemappingInfoForPeelUnroll(true,
+ loop_info,
+ &remap_orig_internal,
+ &remap_copy_internal,
+ &remap_incoming);
+
+ // Insert some extra nodes and edges.
+ HBasicBlock* preheader = loop_info->GetPreHeader();
+ orig_bb_set.SetBit(preheader->GetBlockId());
+
+ // Adjust incoming edges.
+ remap_incoming.clear();
+ remap_incoming.insert(HEdge(preheader->GetSinglePredecessor(), preheader));
+
+ HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+ HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner));
+
+ SuperblockCloner cloner(graph_,
+ &orig_bb_set,
+ &bb_map,
+ &hir_map,
+ /* induction_range= */ nullptr);
+ cloner.SetSuccessorRemappingInfo(&remap_orig_internal, &remap_copy_internal, &remap_incoming);
+
+ EXPECT_FALSE(cloner.IsFastCase());
+}
+
+// Helper for FindCommonLoop which also check that FindCommonLoop is symmetric.
+static HLoopInformation* FindCommonLoopCheck(HLoopInformation* loop1, HLoopInformation* loop2) {
+ HLoopInformation* common_loop12 = FindCommonLoop(loop1, loop2);
+ HLoopInformation* common_loop21 = FindCommonLoop(loop2, loop1);
+ EXPECT_EQ(common_loop21, common_loop12);
+ return common_loop12;
+}
+
+// Tests FindCommonLoop function on a loop nest.
+TEST_F(SuperblockClonerTest, FindCommonLoop) {
+ HBasicBlock* header = nullptr;
+ HBasicBlock* loop_body = nullptr;
+
+ InitGraph();
+
+ // Create the following nested structure of loops
+ // Headers: 1 2 3 4 5
+ // [ [ [ ] ], [ ] ], [ ]
+ CreateBasicLoopControlFlow(entry_block_, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop1_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop2_header = header;
+
+ CreateBasicLoopControlFlow(header, header->GetSuccessors()[1], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop3_header = header;
+
+ CreateBasicLoopControlFlow(loop2_header, loop2_header->GetSuccessors()[0], &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop4_header = header;
+
+ CreateBasicLoopControlFlow(loop1_header, return_block_, &header, &loop_body);
+ CreateBasicLoopDataFlow(header, loop_body);
+ HBasicBlock* loop5_header = header;
+
+ graph_->BuildDominatorTree();
+ EXPECT_TRUE(CheckGraph());
+
+ HLoopInformation* loop1 = loop1_header->GetLoopInformation();
+ HLoopInformation* loop2 = loop2_header->GetLoopInformation();
+ HLoopInformation* loop3 = loop3_header->GetLoopInformation();
+ HLoopInformation* loop4 = loop4_header->GetLoopInformation();
+ HLoopInformation* loop5 = loop5_header->GetLoopInformation();
+
+ EXPECT_TRUE(loop1->IsIn(*loop1));
+ EXPECT_TRUE(loop2->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop1));
+ EXPECT_TRUE(loop3->IsIn(*loop2));
+ EXPECT_TRUE(loop4->IsIn(*loop1));
+
+ EXPECT_FALSE(loop5->IsIn(*loop1));
+ EXPECT_FALSE(loop4->IsIn(*loop2));
+ EXPECT_FALSE(loop4->IsIn(*loop3));
+
+ EXPECT_EQ(loop1->GetPreHeader()->GetLoopInformation(), nullptr);
+ EXPECT_EQ(loop4->GetPreHeader()->GetLoopInformation(), loop1);
+
+ EXPECT_EQ(FindCommonLoopCheck(nullptr, nullptr), nullptr);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, nullptr), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop1), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop2), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop3), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop1, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop3), loop2);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop2, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop4), loop1);
+ EXPECT_EQ(FindCommonLoopCheck(loop3, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop4, loop5), nullptr);
+
+ EXPECT_EQ(FindCommonLoopCheck(loop5, loop5), loop5);
+}
+
} // namespace art
diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc
index 0271850f29..b1abcf6747 100644
--- a/compiler/optimizing/x86_memory_gen.cc
+++ b/compiler/optimizing/x86_memory_gen.cc
@@ -31,7 +31,7 @@ class MemoryOperandVisitor : public HGraphVisitor {
do_implicit_null_checks_(do_implicit_null_checks) {}
private:
- void VisitBoundsCheck(HBoundsCheck* check) OVERRIDE {
+ void VisitBoundsCheck(HBoundsCheck* check) override {
// Replace the length by the array itself, so that we can do compares to memory.
HArrayLength* array_len = check->InputAt(1)->AsArrayLength();
@@ -76,9 +76,10 @@ X86MemoryOperandGeneration::X86MemoryOperandGeneration(HGraph* graph,
do_implicit_null_checks_(codegen->GetCompilerOptions().GetImplicitNullChecks()) {
}
-void X86MemoryOperandGeneration::Run() {
+bool X86MemoryOperandGeneration::Run() {
MemoryOperandVisitor visitor(graph_, do_implicit_null_checks_);
visitor.VisitInsertionOrder();
+ return true;
}
} // namespace x86
diff --git a/compiler/optimizing/x86_memory_gen.h b/compiler/optimizing/x86_memory_gen.h
index 5f15d9f1e6..3f4178d58a 100644
--- a/compiler/optimizing/x86_memory_gen.h
+++ b/compiler/optimizing/x86_memory_gen.h
@@ -31,7 +31,7 @@ class X86MemoryOperandGeneration : public HOptimization {
CodeGenerator* codegen,
OptimizingCompilerStats* stats);
- void Run() OVERRIDE;
+ bool Run() override;
static constexpr const char* kX86MemoryOperandGenerationPassName =
"x86_memory_operand_generation";