diff options
Diffstat (limited to 'compiler/optimizing')
176 files changed, 27920 insertions, 16527 deletions
diff --git a/compiler/optimizing/block_builder.cc b/compiler/optimizing/block_builder.cc index fe7ecd1ae1..d9df23fd47 100644 --- a/compiler/optimizing/block_builder.cc +++ b/compiler/optimizing/block_builder.cc @@ -16,11 +16,34 @@ #include "block_builder.h" -#include "bytecode_utils.h" +#include "base/logging.h" // FOR VLOG. +#include "dex/bytecode_utils.h" +#include "dex/code_item_accessors-inl.h" +#include "dex/dex_file_exception_helpers.h" #include "quicken_info.h" namespace art { +HBasicBlockBuilder::HBasicBlockBuilder(HGraph* graph, + const DexFile* const dex_file, + const CodeItemDebugInfoAccessor& accessor, + ScopedArenaAllocator* local_allocator) + : allocator_(graph->GetAllocator()), + graph_(graph), + dex_file_(dex_file), + code_item_accessor_(accessor), + local_allocator_(local_allocator), + branch_targets_(code_item_accessor_.HasCodeItem() + ? code_item_accessor_.InsnsSizeInCodeUnits() + : /* fake dex_pc=0 for intrinsic graph */ 1u, + nullptr, + local_allocator->Adapter(kArenaAllocGraphBuilder)), + throwing_blocks_(kDefaultNumberOfThrowingBlocks, + local_allocator->Adapter(kArenaAllocGraphBuilder)), + number_of_branches_(0u), + quicken_index_for_dex_pc_(std::less<uint32_t>(), + local_allocator->Adapter(kArenaAllocGraphBuilder)) {} + HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t dex_pc) { return MaybeCreateBlockAt(dex_pc, dex_pc); } @@ -29,7 +52,7 @@ HBasicBlock* HBasicBlockBuilder::MaybeCreateBlockAt(uint32_t semantic_dex_pc, uint32_t store_dex_pc) { HBasicBlock* block = branch_targets_[store_dex_pc]; if (block == nullptr) { - block = new (arena_) HBasicBlock(graph_, semantic_dex_pc); + block = new (allocator_) HBasicBlock(graph_, semantic_dex_pc); branch_targets_[store_dex_pc] = block; } DCHECK_EQ(block->GetDexPc(), semantic_dex_pc); @@ -40,30 +63,30 @@ bool HBasicBlockBuilder::CreateBranchTargets() { // Create the first block for the dex instructions, single successor of the entry block. MaybeCreateBlockAt(0u); - if (code_item_.tries_size_ != 0) { + if (code_item_accessor_.TriesSize() != 0) { // Create branch targets at the start/end of the TryItem range. These are // places where the program might fall through into/out of the a block and // where TryBoundary instructions will be inserted later. Other edges which // enter/exit the try blocks are a result of branches/switches. - for (size_t idx = 0; idx < code_item_.tries_size_; ++idx) { - const DexFile::TryItem* try_item = DexFile::GetTryItems(code_item_, idx); - uint32_t dex_pc_start = try_item->start_addr_; - uint32_t dex_pc_end = dex_pc_start + try_item->insn_count_; + for (const DexFile::TryItem& try_item : code_item_accessor_.TryItems()) { + uint32_t dex_pc_start = try_item.start_addr_; + uint32_t dex_pc_end = dex_pc_start + try_item.insn_count_; MaybeCreateBlockAt(dex_pc_start); - if (dex_pc_end < code_item_.insns_size_in_code_units_) { + if (dex_pc_end < code_item_accessor_.InsnsSizeInCodeUnits()) { // TODO: Do not create block if the last instruction cannot fall through. MaybeCreateBlockAt(dex_pc_end); - } else if (dex_pc_end == code_item_.insns_size_in_code_units_) { + } else if (dex_pc_end == code_item_accessor_.InsnsSizeInCodeUnits()) { // The TryItem spans until the very end of the CodeItem and therefore // cannot have any code afterwards. } else { // The TryItem spans beyond the end of the CodeItem. This is invalid code. + VLOG(compiler) << "Not compiled: TryItem spans beyond the end of the CodeItem"; return false; } } // Create branch targets for exception handlers. - const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0); + const uint8_t* handlers_ptr = code_item_accessor_.GetCatchHandlerData(); uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr); for (uint32_t idx = 0; idx < handlers_size; ++idx) { CatchHandlerIterator iterator(handlers_ptr); @@ -76,14 +99,15 @@ bool HBasicBlockBuilder::CreateBranchTargets() { // Iterate over all instructions and find branching instructions. Create blocks for // the locations these instructions branch to. - for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) { - uint32_t dex_pc = it.CurrentDexPc(); - const Instruction& instruction = it.CurrentInstruction(); + for (const DexInstructionPcPair& pair : code_item_accessor_) { + const uint32_t dex_pc = pair.DexPc(); + const Instruction& instruction = pair.Inst(); if (instruction.IsBranch()) { number_of_branches_++; MaybeCreateBlockAt(dex_pc + instruction.GetTargetOffset()); } else if (instruction.IsSwitch()) { + number_of_branches_++; // count as at least one branch (b/77652521) DexSwitchTable table(instruction, dex_pc); for (DexSwitchTableIterator s_it(table); !s_it.Done(); s_it.Advance()) { MaybeCreateBlockAt(dex_pc + s_it.CurrentTargetOffset()); @@ -105,13 +129,14 @@ bool HBasicBlockBuilder::CreateBranchTargets() { } if (instruction.CanFlowThrough()) { - if (it.IsLast()) { + DexInstructionIterator next(std::next(DexInstructionIterator(pair))); + if (next == code_item_accessor_.end()) { // In the normal case we should never hit this but someone can artificially forge a dex // file to fall-through out the method code. In this case we bail out compilation. + VLOG(compiler) << "Not compiled: Fall-through beyond the CodeItem"; return false; - } else { - MaybeCreateBlockAt(dex_pc + it.CurrentInstruction().SizeInCodeUnits()); } + MaybeCreateBlockAt(next.DexPc()); } } @@ -126,8 +151,9 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { bool is_throwing_block = false; // Calculate the qucikening index here instead of CreateBranchTargets since it's easier to // calculate in dex_pc order. - for (CodeItemIterator it(code_item_); !it.Done(); it.Advance()) { - uint32_t dex_pc = it.CurrentDexPc(); + for (const DexInstructionPcPair& pair : code_item_accessor_) { + const uint32_t dex_pc = pair.DexPc(); + const Instruction& instruction = pair.Inst(); // Check if this dex_pc address starts a new basic block. HBasicBlock* next_block = GetBlockAt(dex_pc); @@ -144,7 +170,7 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { graph_->AddBlock(block); } // Make sure to increment this before the continues. - if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) { + if (QuickenInfoTable::NeedsIndexForInstruction(&instruction)) { ++quicken_index; } @@ -153,8 +179,6 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { continue; } - const Instruction& instruction = it.CurrentInstruction(); - if (!is_throwing_block && IsThrowingDexInstruction(instruction)) { DCHECK(!ContainsElement(throwing_blocks_, block)); is_throwing_block = true; @@ -185,9 +209,9 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { continue; } + // Go to the next instruction in case we read dex PC below. if (instruction.CanFlowThrough()) { - uint32_t next_dex_pc = dex_pc + instruction.SizeInCodeUnits(); - block->AddSuccessor(GetBlockAt(next_dex_pc)); + block->AddSuccessor(GetBlockAt(std::next(DexInstructionIterator(pair)).DexPc())); } // The basic block ends here. Do not add any more instructions. @@ -200,7 +224,7 @@ void HBasicBlockBuilder::ConnectBasicBlocks() { // Returns the TryItem stored for `block` or nullptr if there is no info for it. static const DexFile::TryItem* GetTryItem( HBasicBlock* block, - const ArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { + const ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*>& try_block_info) { auto iterator = try_block_info.find(block->GetBlockId()); return (iterator == try_block_info.end()) ? nullptr : iterator->second; } @@ -210,10 +234,12 @@ static const DexFile::TryItem* GetTryItem( // successors matches the order in which runtime exception delivery searches // for a handler. static void LinkToCatchBlocks(HTryBoundary* try_boundary, - const DexFile::CodeItem& code_item, + const CodeItemDataAccessor& accessor, const DexFile::TryItem* try_item, - const ArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) { - for (CatchHandlerIterator it(code_item, *try_item); it.HasNext(); it.Next()) { + const ScopedArenaSafeMap<uint32_t, HBasicBlock*>& catch_blocks) { + for (CatchHandlerIterator it(accessor.GetCatchHandlerData(try_item->handler_off_)); + it.HasNext(); + it.Next()) { try_boundary->AddExceptionHandler(catch_blocks.Get(it.GetHandlerAddress())); } } @@ -229,7 +255,7 @@ bool HBasicBlockBuilder::MightHaveLiveNormalPredecessors(HBasicBlock* catch_bloc } } - const Instruction& first = GetDexInstructionAt(code_item_, catch_block->GetDexPc()); + const Instruction& first = code_item_accessor_.InstructionAt(catch_block->GetDexPc()); if (first.Opcode() == Instruction::MOVE_EXCEPTION) { // Verifier guarantees that if a catch block begins with MOVE_EXCEPTION then // it has no live normal predecessors. @@ -247,14 +273,14 @@ bool HBasicBlockBuilder::MightHaveLiveNormalPredecessors(HBasicBlock* catch_bloc } void HBasicBlockBuilder::InsertTryBoundaryBlocks() { - if (code_item_.tries_size_ == 0) { + if (code_item_accessor_.TriesSize() == 0) { return; } // Keep a map of all try blocks and their respective TryItems. We do not use // the block's pointer but rather its id to ensure deterministic iteration. - ArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( - std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaSafeMap<uint32_t, const DexFile::TryItem*> try_block_info( + std::less<uint32_t>(), local_allocator_->Adapter(kArenaAllocGraphBuilder)); // Obtain TryItem information for blocks with throwing instructions, and split // blocks which are both try & catch to simplify the graph. @@ -269,21 +295,21 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // loop for synchronized blocks. if (ContainsElement(throwing_blocks_, block)) { // Try to find a TryItem covering the block. - const int32_t try_item_idx = DexFile::FindTryItem(code_item_, block->GetDexPc()); - if (try_item_idx != -1) { + const DexFile::TryItem* try_item = code_item_accessor_.FindTryItem(block->GetDexPc()); + if (try_item != nullptr) { // Block throwing and in a TryItem. Store the try block information. - try_block_info.Put(block->GetBlockId(), DexFile::GetTryItems(code_item_, try_item_idx)); + try_block_info.Put(block->GetBlockId(), try_item); } } } // Map from a handler dex_pc to the corresponding catch block. - ArenaSafeMap<uint32_t, HBasicBlock*> catch_blocks( - std::less<uint32_t>(), arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaSafeMap<uint32_t, HBasicBlock*> catch_blocks( + std::less<uint32_t>(), local_allocator_->Adapter(kArenaAllocGraphBuilder)); // Iterate over catch blocks, create artifical landing pads if necessary to // simplify the CFG, and set metadata. - const uint8_t* handlers_ptr = DexFile::GetCatchHandlerData(code_item_, 0); + const uint8_t* handlers_ptr = code_item_accessor_.GetCatchHandlerData(); uint32_t handlers_size = DecodeUnsignedLeb128(&handlers_ptr); for (uint32_t idx = 0; idx < handlers_size; ++idx) { CatchHandlerIterator iterator(handlers_ptr); @@ -302,8 +328,8 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { HBasicBlock* catch_block = GetBlockAt(address); bool is_try_block = (try_block_info.find(catch_block->GetBlockId()) != try_block_info.end()); if (is_try_block || MightHaveLiveNormalPredecessors(catch_block)) { - HBasicBlock* new_catch_block = new (arena_) HBasicBlock(graph_, address); - new_catch_block->AddInstruction(new (arena_) HGoto(address)); + HBasicBlock* new_catch_block = new (allocator_) HBasicBlock(graph_, address); + new_catch_block->AddInstruction(new (allocator_) HGoto(address)); new_catch_block->AddSuccessor(catch_block); graph_->AddBlock(new_catch_block); catch_block = new_catch_block; @@ -311,7 +337,7 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { catch_blocks.Put(address, catch_block); catch_block->SetTryCatchInformation( - new (arena_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); + new (allocator_) TryCatchInformation(iterator.GetHandlerTypeIndex(), *dex_file_)); } handlers_ptr = iterator.EndDataPointer(); } @@ -328,10 +354,10 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { if (GetTryItem(predecessor, try_block_info) != try_item) { // Found a predecessor not covered by the same TryItem. Insert entering // boundary block. - HTryBoundary* try_entry = - new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc()); + HTryBoundary* try_entry = new (allocator_) HTryBoundary( + HTryBoundary::BoundaryKind::kEntry, try_block->GetDexPc()); try_block->CreateImmediateDominator()->AddInstruction(try_entry); - LinkToCatchBlocks(try_entry, code_item_, try_item, catch_blocks); + LinkToCatchBlocks(try_entry, code_item_accessor_, try_item, catch_blocks); break; } } @@ -357,18 +383,19 @@ void HBasicBlockBuilder::InsertTryBoundaryBlocks() { // Insert TryBoundary and link to catch blocks. HTryBoundary* try_exit = - new (arena_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc()); + new (allocator_) HTryBoundary(HTryBoundary::BoundaryKind::kExit, successor->GetDexPc()); graph_->SplitEdge(try_block, successor)->AddInstruction(try_exit); - LinkToCatchBlocks(try_exit, code_item_, try_item, catch_blocks); + LinkToCatchBlocks(try_exit, code_item_accessor_, try_item, catch_blocks); } } } bool HBasicBlockBuilder::Build() { + DCHECK(code_item_accessor_.HasCodeItem()); DCHECK(graph_->GetBlocks().empty()); - graph_->SetEntryBlock(new (arena_) HBasicBlock(graph_, kNoDexPc)); - graph_->SetExitBlock(new (arena_) HBasicBlock(graph_, kNoDexPc)); + graph_->SetEntryBlock(new (allocator_) HBasicBlock(graph_, kNoDexPc)); + graph_->SetExitBlock(new (allocator_) HBasicBlock(graph_, kNoDexPc)); // TODO(dbrazdil): Do CreateBranchTargets and ConnectBasicBlocks in one pass. if (!CreateBranchTargets()) { @@ -381,6 +408,27 @@ bool HBasicBlockBuilder::Build() { return true; } +void HBasicBlockBuilder::BuildIntrinsic() { + DCHECK(!code_item_accessor_.HasCodeItem()); + DCHECK(graph_->GetBlocks().empty()); + + // Create blocks. + HBasicBlock* entry_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); + HBasicBlock* exit_block = new (allocator_) HBasicBlock(graph_, kNoDexPc); + HBasicBlock* body = MaybeCreateBlockAt(/* semantic_dex_pc */ kNoDexPc, /* store_dex_pc */ 0u); + + // Add blocks to the graph. + graph_->AddBlock(entry_block); + graph_->AddBlock(body); + graph_->AddBlock(exit_block); + graph_->SetEntryBlock(entry_block); + graph_->SetExitBlock(exit_block); + + // Connect blocks. + entry_block->AddSuccessor(body); + body->AddSuccessor(exit_block); +} + size_t HBasicBlockBuilder::GetQuickenIndex(uint32_t dex_pc) const { return quicken_index_for_dex_pc_.Get(dex_pc); } diff --git a/compiler/optimizing/block_builder.h b/compiler/optimizing/block_builder.h index 6adce815f4..2c1f034d80 100644 --- a/compiler/optimizing/block_builder.h +++ b/compiler/optimizing/block_builder.h @@ -17,9 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BLOCK_BUILDER_H_ -#include "base/arena_containers.h" -#include "base/arena_object.h" -#include "dex_file.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "dex/code_item_accessors.h" +#include "dex/dex_file.h" #include "nodes.h" namespace art { @@ -28,17 +29,8 @@ class HBasicBlockBuilder : public ValueObject { public: HBasicBlockBuilder(HGraph* graph, const DexFile* const dex_file, - const DexFile::CodeItem& code_item) - : arena_(graph->GetArena()), - graph_(graph), - dex_file_(dex_file), - code_item_(code_item), - branch_targets_(code_item.insns_size_in_code_units_, - nullptr, - arena_->Adapter(kArenaAllocGraphBuilder)), - throwing_blocks_(kDefaultNumberOfThrowingBlocks, arena_->Adapter(kArenaAllocGraphBuilder)), - number_of_branches_(0u), - quicken_index_for_dex_pc_(std::less<uint32_t>(), arena_->Adapter()) {} + const CodeItemDebugInfoAccessor& accessor, + ScopedArenaAllocator* local_allocator); // Creates basic blocks in `graph_` at branch target dex_pc positions of the // `code_item_`. Blocks are connected but left unpopulated with instructions. @@ -46,6 +38,9 @@ class HBasicBlockBuilder : public ValueObject { // exits a try block. bool Build(); + // Creates basic blocks in `graph_` for compiling an intrinsic. + void BuildIntrinsic(); + size_t GetNumberOfBranches() const { return number_of_branches_; } HBasicBlock* GetBlockAt(uint32_t dex_pc) const { return branch_targets_[dex_pc]; } @@ -71,18 +66,19 @@ class HBasicBlockBuilder : public ValueObject { // handler dex_pcs. bool MightHaveLiveNormalPredecessors(HBasicBlock* catch_block); - ArenaAllocator* const arena_; + ArenaAllocator* const allocator_; HGraph* const graph_; const DexFile* const dex_file_; - const DexFile::CodeItem& code_item_; + CodeItemDataAccessor code_item_accessor_; // null code item for intrinsic graph. - ArenaVector<HBasicBlock*> branch_targets_; - ArenaVector<HBasicBlock*> throwing_blocks_; + ScopedArenaAllocator* const local_allocator_; + ScopedArenaVector<HBasicBlock*> branch_targets_; + ScopedArenaVector<HBasicBlock*> throwing_blocks_; size_t number_of_branches_; // A table to quickly find the quicken index for the first instruction of a basic block. - ArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_; + ScopedArenaSafeMap<uint32_t, uint32_t> quicken_index_for_dex_pc_; static constexpr size_t kDefaultNumberOfThrowingBlocks = 2u; diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc index f3ecdf036a..d893cc88c4 100644 --- a/compiler/optimizing/bounds_check_elimination.cc +++ b/compiler/optimizing/bounds_check_elimination.cc @@ -18,10 +18,11 @@ #include <limits> -#include "base/arena_containers.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "induction_var_range.h" -#include "side_effects_analysis.h" #include "nodes.h" +#include "side_effects_analysis.h" namespace art { @@ -287,7 +288,7 @@ class ValueBound : public ValueObject { */ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { public: - ValueRange(ArenaAllocator* allocator, ValueBound lower, ValueBound upper) + ValueRange(ScopedArenaAllocator* allocator, ValueBound lower, ValueBound upper) : allocator_(allocator), lower_(lower), upper_(upper) {} virtual ~ValueRange() {} @@ -297,11 +298,11 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { return AsMonotonicValueRange() != nullptr; } - ArenaAllocator* GetAllocator() const { return allocator_; } + ScopedArenaAllocator* GetAllocator() const { return allocator_; } ValueBound GetLower() const { return lower_; } ValueBound GetUpper() const { return upper_; } - bool IsConstantValueRange() { return lower_.IsConstant() && upper_.IsConstant(); } + bool IsConstantValueRange() const { return lower_.IsConstant() && upper_.IsConstant(); } // If it's certain that this value range fits in other_range. virtual bool FitsIn(ValueRange* other_range) const { @@ -350,7 +351,7 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { } private: - ArenaAllocator* const allocator_; + ScopedArenaAllocator* const allocator_; const ValueBound lower_; // inclusive const ValueBound upper_; // inclusive @@ -365,7 +366,7 @@ class ValueRange : public ArenaObject<kArenaAllocBoundsCheckElimination> { */ class MonotonicValueRange : public ValueRange { public: - MonotonicValueRange(ArenaAllocator* allocator, + MonotonicValueRange(ScopedArenaAllocator* allocator, HPhi* induction_variable, HInstruction* initial, int32_t increment, @@ -510,21 +511,19 @@ class BCEVisitor : public HGraphVisitor { const SideEffectsAnalysis& side_effects, HInductionVarAnalysis* induction_analysis) : HGraphVisitor(graph), + allocator_(graph->GetArenaStack()), maps_(graph->GetBlocks().size(), - ArenaSafeMap<int, ValueRange*>( + ScopedArenaSafeMap<int, ValueRange*>( std::less<int>(), - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - first_index_bounds_check_map_( - std::less<int>(), - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - early_exit_loop_( - std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - taken_test_loop_( - std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), - finite_loop_(graph->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)), + allocator_.Adapter(kArenaAllocBoundsCheckElimination)), + allocator_.Adapter(kArenaAllocBoundsCheckElimination)), + first_index_bounds_check_map_(std::less<int>(), + allocator_.Adapter(kArenaAllocBoundsCheckElimination)), + early_exit_loop_(std::less<uint32_t>(), + allocator_.Adapter(kArenaAllocBoundsCheckElimination)), + taken_test_loop_(std::less<uint32_t>(), + allocator_.Adapter(kArenaAllocBoundsCheckElimination)), + finite_loop_(allocator_.Adapter(kArenaAllocBoundsCheckElimination)), has_dom_based_dynamic_bce_(false), initial_block_size_(graph->GetBlocks().size()), side_effects_(side_effects), @@ -569,7 +568,7 @@ class BCEVisitor : public HGraphVisitor { private: // Return the map of proven value ranges at the beginning of a basic block. - ArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { + ScopedArenaSafeMap<int, ValueRange*>* GetValueRangeMap(HBasicBlock* basic_block) { if (IsAddedBlock(basic_block)) { // Added blocks don't keep value ranges. return nullptr; @@ -580,7 +579,7 @@ class BCEVisitor : public HGraphVisitor { // Traverse up the dominator tree to look for value range info. ValueRange* LookupValueRange(HInstruction* instruction, HBasicBlock* basic_block) { while (basic_block != nullptr) { - ArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); + ScopedArenaSafeMap<int, ValueRange*>* map = GetValueRangeMap(basic_block); if (map != nullptr) { if (map->find(instruction->GetId()) != map->end()) { return map->Get(instruction->GetId()); @@ -596,6 +595,7 @@ class BCEVisitor : public HGraphVisitor { // Helper method to assign a new range to an instruction in given basic block. void AssignRange(HBasicBlock* basic_block, HInstruction* instruction, ValueRange* range) { + DCHECK(!range->IsMonotonicValueRange() || instruction->IsLoopHeaderPhi()); GetValueRangeMap(basic_block)->Overwrite(instruction->GetId(), range); } @@ -667,8 +667,8 @@ class BCEVisitor : public HGraphVisitor { if (successor != nullptr) { bool overflow; bool underflow; - ValueRange* new_left_range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + ValueRange* new_left_range = new (&allocator_) ValueRange( + &allocator_, left_range->GetBound(), right_range->GetBound().Add(left_compensation, &overflow, &underflow)); if (!overflow && !underflow) { @@ -676,8 +676,8 @@ class BCEVisitor : public HGraphVisitor { new_left_range); } - ValueRange* new_right_range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + ValueRange* new_right_range = new (&allocator_) ValueRange( + &allocator_, left_range->GetBound().Add(right_compensation, &overflow, &underflow), right_range->GetBound()); if (!overflow && !underflow) { @@ -749,8 +749,8 @@ class BCEVisitor : public HGraphVisitor { if (overflow || underflow) { return; } - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, ValueBound::Min(), new_upper); ApplyRangeFromComparison(left, block, true_successor, new_range); } @@ -761,8 +761,8 @@ class BCEVisitor : public HGraphVisitor { if (overflow || underflow) { return; } - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, new_lower, ValueBound::Max()); ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondGT || cond == kCondGE) { @@ -773,8 +773,8 @@ class BCEVisitor : public HGraphVisitor { if (overflow || underflow) { return; } - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), new_lower, ValueBound::Max()); + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, new_lower, ValueBound::Max()); ApplyRangeFromComparison(left, block, true_successor, new_range); } @@ -784,30 +784,38 @@ class BCEVisitor : public HGraphVisitor { if (overflow || underflow) { return; } - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), ValueBound::Min(), new_upper); + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, ValueBound::Min(), new_upper); ApplyRangeFromComparison(left, block, false_successor, new_range); } } else if (cond == kCondNE || cond == kCondEQ) { - if (left->IsArrayLength() && lower.IsConstant() && upper.IsConstant()) { - // Special case: - // length == [c,d] yields [c, d] along true - // length != [c,d] yields [c, d] along false - if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? true_successor : false_successor, new_range); - } - // In addition: - // length == 0 yields [1, max] along false - // length != 0 yields [1, max] along true - if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { - ValueRange* new_range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), ValueBound(nullptr, 1), ValueBound::Max()); - ApplyRangeFromComparison( - left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + if (left->IsArrayLength()) { + if (lower.IsConstant() && upper.IsConstant()) { + // Special case: + // length == [c,d] yields [c, d] along true + // length != [c,d] yields [c, d] along false + if (!lower.Equals(ValueBound::Min()) || !upper.Equals(ValueBound::Max())) { + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); + } + // In addition: + // length == 0 yields [1, max] along false + // length != 0 yields [1, max] along true + if (lower.GetConstant() == 0 && upper.GetConstant() == 0) { + ValueRange* new_range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, 1), ValueBound::Max()); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? false_successor : true_successor, new_range); + } } + } else if (lower.IsRelatedToArrayLength() && lower.Equals(upper)) { + // Special aliasing case, with x not array length itself: + // x == [length,length] yields x == length along true + // x != [length,length] yields x == length along false + ValueRange* new_range = new (&allocator_) ValueRange(&allocator_, lower, upper); + ApplyRangeFromComparison( + left, block, cond == kCondEQ ? true_successor : false_successor, new_range); } } } @@ -825,12 +833,26 @@ class BCEVisitor : public HGraphVisitor { // Non-constant index. ValueBound lower = ValueBound(nullptr, 0); // constant 0 ValueBound upper = ValueBound(array_length, -1); // array_length - 1 - ValueRange array_range(GetGraph()->GetArena(), lower, upper); + ValueRange array_range(&allocator_, lower, upper); // Try index range obtained by dominator-based analysis. ValueRange* index_range = LookupValueRange(index, block); - if (index_range != nullptr && index_range->FitsIn(&array_range)) { - ReplaceInstruction(bounds_check, index); - return; + if (index_range != nullptr) { + if (index_range->FitsIn(&array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } else if (index_range->IsConstantValueRange()) { + // If the non-constant index turns out to have a constant range, + // make one more attempt to get a constant in the array range. + ValueRange* existing_range = LookupValueRange(array_length, block); + if (existing_range != nullptr && + existing_range->IsConstantValueRange()) { + ValueRange constant_array_range(&allocator_, lower, existing_range->GetLower()); + if (index_range->FitsIn(&constant_array_range)) { + ReplaceInstruction(bounds_check, index); + return; + } + } + } } // Try index range obtained by induction variable analysis. // Disables dynamic bce if OOB is certain. @@ -874,8 +896,7 @@ class BCEVisitor : public HGraphVisitor { } else { ValueBound lower = ValueBound(nullptr, constant + 1); ValueBound upper = ValueBound::Max(); - ValueRange* range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); + ValueRange* range = new (&allocator_) ValueRange(&allocator_, lower, upper); AssignRange(block, array_length, range); } } @@ -926,7 +947,7 @@ class BCEVisitor : public HGraphVisitor { void VisitPhi(HPhi* phi) OVERRIDE { if (phi->IsLoopHeaderPhi() - && (phi->GetType() == Primitive::kPrimInt) + && (phi->GetType() == DataType::Type::kInt32) && HasSameInputAtBackEdges(phi)) { HInstruction* instruction = phi->InputAt(1); HInstruction *left; @@ -937,8 +958,8 @@ class BCEVisitor : public HGraphVisitor { ValueRange* range = nullptr; if (increment == 0) { // Add constant 0. It's really a fixed value. - range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + range = new (&allocator_) ValueRange( + &allocator_, ValueBound(initial_value, 0), ValueBound(initial_value, 0)); } else { @@ -958,8 +979,8 @@ class BCEVisitor : public HGraphVisitor { bound = increment > 0 ? ValueBound::Min() : ValueBound::Max(); } } - range = new (GetGraph()->GetArena()) MonotonicValueRange( - GetGraph()->GetArena(), + range = new (&allocator_) MonotonicValueRange( + &allocator_, phi, initial_value, increment, @@ -1038,8 +1059,8 @@ class BCEVisitor : public HGraphVisitor { !ValueBound::WouldAddOverflowOrUnderflow(c0, -c1)) { if ((c0 - c1) <= 0) { // array.length + (c0 - c1) won't overflow/underflow. - ValueRange* range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + ValueRange* range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, right_const - upper.GetConstant()), ValueBound(array_length, right_const - lower.GetConstant())); AssignRange(sub->GetBlock(), sub, range); @@ -1086,8 +1107,8 @@ class BCEVisitor : public HGraphVisitor { // than array_length. return; } - ValueRange* range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + ValueRange* range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, std::numeric_limits<int32_t>::min()), ValueBound(left, 0)); AssignRange(instruction->GetBlock(), instruction, range); @@ -1112,8 +1133,8 @@ class BCEVisitor : public HGraphVisitor { if (constant > 0) { // constant serves as a mask so any number masked with it // gets a [0, constant] value range. - ValueRange* range = new (GetGraph()->GetArena()) ValueRange( - GetGraph()->GetArena(), + ValueRange* range = new (&allocator_) ValueRange( + &allocator_, ValueBound(nullptr, 0), ValueBound(nullptr, constant)); AssignRange(instruction->GetBlock(), instruction, range); @@ -1121,6 +1142,66 @@ class BCEVisitor : public HGraphVisitor { } } + void VisitRem(HRem* instruction) OVERRIDE { + HInstruction* left = instruction->GetLeft(); + HInstruction* right = instruction->GetRight(); + + // Handle 'i % CONST' format expression in array index, e.g: + // array[i % 20]; + if (right->IsIntConstant()) { + int32_t right_const = std::abs(right->AsIntConstant()->GetValue()); + if (right_const == 0) { + return; + } + // The sign of divisor CONST doesn't affect the sign final value range. + // For example: + // if (i > 0) { + // array[i % 10]; // index value range [0, 9] + // array[i % -10]; // index value range [0, 9] + // } + ValueRange* right_range = new (&allocator_) ValueRange( + &allocator_, + ValueBound(nullptr, 1 - right_const), + ValueBound(nullptr, right_const - 1)); + + ValueRange* left_range = LookupValueRange(left, instruction->GetBlock()); + if (left_range != nullptr) { + right_range = right_range->Narrow(left_range); + } + AssignRange(instruction->GetBlock(), instruction, right_range); + return; + } + + // Handle following pattern: + // i0 NullCheck + // i1 ArrayLength[i0] + // i2 DivByZeroCheck [i1] <-- right + // i3 Rem [i5, i2] <-- we are here. + // i4 BoundsCheck [i3,i1] + if (right->IsDivZeroCheck()) { + // if array_length can pass div-by-zero check, + // array_length must be > 0. + right = right->AsDivZeroCheck()->InputAt(0); + } + + // Handle 'i % array.length' format expression in array index, e.g: + // array[(i+7) % array.length]; + if (right->IsArrayLength()) { + ValueBound lower = ValueBound::Min(); // ideally, lower should be '1-array_length'. + ValueBound upper = ValueBound(right, -1); // array_length - 1 + ValueRange* right_range = new (&allocator_) ValueRange( + &allocator_, + lower, + upper); + ValueRange* left_range = LookupValueRange(left, instruction->GetBlock()); + if (left_range != nullptr) { + right_range = right_range->Narrow(left_range); + } + AssignRange(instruction->GetBlock(), instruction, right_range); + return; + } + } + void VisitNewArray(HNewArray* new_array) OVERRIDE { HInstruction* len = new_array->GetLength(); if (!len->IsIntConstant()) { @@ -1134,8 +1215,7 @@ class BCEVisitor : public HGraphVisitor { // which isn't available as an instruction yet. new_array will // be treated the same as new_array.length when it's used in a ValueBound. ValueBound upper = ValueBound(new_array, -right_const); - ValueRange* range = new (GetGraph()->GetArena()) - ValueRange(GetGraph()->GetArena(), lower, upper); + ValueRange* range = new (&allocator_) ValueRange(&allocator_, lower, upper); ValueRange* existing_range = LookupValueRange(left, new_array->GetBlock()); if (existing_range != nullptr) { range = existing_range->Narrow(range); @@ -1199,14 +1279,15 @@ class BCEVisitor : public HGraphVisitor { if (base == nullptr) { DCHECK_GE(min_c, 0); } else { - HInstruction* lower = new (GetGraph()->GetArena()) - HAdd(Primitive::kPrimInt, base, GetGraph()->GetIntConstant(min_c)); - upper = new (GetGraph()->GetArena()) HAdd(Primitive::kPrimInt, base, upper); + HInstruction* lower = new (GetGraph()->GetAllocator()) + HAdd(DataType::Type::kInt32, base, GetGraph()->GetIntConstant(min_c)); + upper = new (GetGraph()->GetAllocator()) HAdd(DataType::Type::kInt32, base, upper); block->InsertInstructionBefore(lower, bounds_check); block->InsertInstructionBefore(upper, bounds_check); - InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAbove(lower, upper)); + InsertDeoptInBlock(bounds_check, new (GetGraph()->GetAllocator()) HAbove(lower, upper)); } - InsertDeoptInBlock(bounds_check, new (GetGraph()->GetArena()) HAboveOrEqual(upper, array_length)); + InsertDeoptInBlock( + bounds_check, new (GetGraph()->GetAllocator()) HAboveOrEqual(upper, array_length)); // Flag that this kind of deoptimization has occurred. has_dom_based_dynamic_bce_ = true; } @@ -1229,10 +1310,10 @@ class BCEVisitor : public HGraphVisitor { HInstruction* base = value.GetInstruction(); int32_t min_c = base == nullptr ? 0 : value.GetConstant(); int32_t max_c = value.GetConstant(); - ArenaVector<HBoundsCheck*> candidates( - GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); - ArenaVector<HBoundsCheck*> standby( - GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); + ScopedArenaVector<HBoundsCheck*> candidates( + allocator_.Adapter(kArenaAllocBoundsCheckElimination)); + ScopedArenaVector<HBoundsCheck*> standby( + allocator_.Adapter(kArenaAllocBoundsCheckElimination)); for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) { // Another bounds check in same or dominated block? HInstruction* user = use.GetUser(); @@ -1316,7 +1397,7 @@ class BCEVisitor : public HGraphVisitor { v2.is_known && (v2.a_constant == 0 || v2.a_constant == 1)) { DCHECK(v1.a_constant == 1 || v1.instruction == nullptr); DCHECK(v2.a_constant == 1 || v2.instruction == nullptr); - ValueRange index_range(GetGraph()->GetArena(), + ValueRange index_range(&allocator_, ValueBound(v1.instruction, v1.b_constant), ValueBound(v2.instruction, v2.b_constant)); // If analysis reveals a certain OOB, disable dynamic BCE. Otherwise, @@ -1348,10 +1429,10 @@ class BCEVisitor : public HGraphVisitor { HInstruction* base = value.GetInstruction(); int32_t min_c = base == nullptr ? 0 : value.GetConstant(); int32_t max_c = value.GetConstant(); - ArenaVector<HBoundsCheck*> candidates( - GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); - ArenaVector<HBoundsCheck*> standby( - GetGraph()->GetArena()->Adapter(kArenaAllocBoundsCheckElimination)); + ScopedArenaVector<HBoundsCheck*> candidates( + allocator_.Adapter(kArenaAllocBoundsCheckElimination)); + ScopedArenaVector<HBoundsCheck*> standby( + allocator_.Adapter(kArenaAllocBoundsCheckElimination)); for (const HUseListNode<HInstruction*>& use : array_length->GetUses()) { HInstruction* user = use.GetUser(); if (user->IsBoundsCheck() && loop == user->GetBlock()->GetLoopInformation()) { @@ -1437,7 +1518,8 @@ class BCEVisitor : public HGraphVisitor { if (min_c != max_c) { DCHECK(min_lower == nullptr && min_upper != nullptr && max_lower == nullptr && max_upper != nullptr); - InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_upper, max_upper)); + InsertDeoptInLoop( + loop, block, new (GetGraph()->GetAllocator()) HAbove(min_upper, max_upper)); } else { DCHECK(min_lower == nullptr && min_upper == nullptr && max_lower == nullptr && max_upper != nullptr); @@ -1447,15 +1529,17 @@ class BCEVisitor : public HGraphVisitor { if (min_c != max_c) { DCHECK(min_lower != nullptr && min_upper != nullptr && max_lower != nullptr && max_upper != nullptr); - InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(min_lower, max_lower)); + InsertDeoptInLoop( + loop, block, new (GetGraph()->GetAllocator()) HAbove(min_lower, max_lower)); } else { DCHECK(min_lower == nullptr && min_upper == nullptr && max_lower != nullptr && max_upper != nullptr); } - InsertDeoptInLoop(loop, block, new (GetGraph()->GetArena()) HAbove(max_lower, max_upper)); + InsertDeoptInLoop( + loop, block, new (GetGraph()->GetAllocator()) HAbove(max_lower, max_upper)); } InsertDeoptInLoop( - loop, block, new (GetGraph()->GetArena()) HAboveOrEqual(max_upper, array_length)); + loop, block, new (GetGraph()->GetAllocator()) HAboveOrEqual(max_upper, array_length)); } else { // TODO: if rejected, avoid doing this again for subsequent instructions in this set? } @@ -1549,7 +1633,7 @@ class BCEVisitor : public HGraphVisitor { TransformLoopForDeoptimizationIfNeeded(loop, needs_taken_test); HBasicBlock* block = GetPreHeader(loop, check); HInstruction* cond = - new (GetGraph()->GetArena()) HEqual(array, GetGraph()->GetNullConstant()); + new (GetGraph()->GetAllocator()) HEqual(array, GetGraph()->GetNullConstant()); InsertDeoptInLoop(loop, block, cond, /* is_null_check */ true); ReplaceInstruction(check, array); return true; @@ -1624,8 +1708,8 @@ class BCEVisitor : public HGraphVisitor { block->InsertInstructionBefore(condition, block->GetLastInstruction()); DeoptimizationKind kind = is_null_check ? DeoptimizationKind::kLoopNullBCE : DeoptimizationKind::kLoopBoundsBCE; - HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, kind, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( + GetGraph()->GetAllocator(), condition, kind, suspend->GetDexPc()); block->InsertInstructionBefore(deoptimize, block->GetLastInstruction()); if (suspend->HasEnvironment()) { deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( @@ -1637,8 +1721,11 @@ class BCEVisitor : public HGraphVisitor { void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) { HBasicBlock* block = bounds_check->GetBlock(); block->InsertInstructionBefore(condition, bounds_check); - HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), condition, DeoptimizationKind::kBlockBCE, bounds_check->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( + GetGraph()->GetAllocator(), + condition, + DeoptimizationKind::kBlockBCE, + bounds_check->GetDexPc()); block->InsertInstructionBefore(deoptimize, bounds_check); deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment()); } @@ -1702,18 +1789,18 @@ class BCEVisitor : public HGraphVisitor { HBasicBlock* false_block = if_block->GetSuccessors()[1]; // False successor. // Goto instructions. - true_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); - false_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); - new_preheader->AddInstruction(new (GetGraph()->GetArena()) HGoto()); + true_block->AddInstruction(new (GetGraph()->GetAllocator()) HGoto()); + false_block->AddInstruction(new (GetGraph()->GetAllocator()) HGoto()); + new_preheader->AddInstruction(new (GetGraph()->GetAllocator()) HGoto()); // Insert the taken-test to see if the loop body is entered. If the // loop isn't entered at all, it jumps around the deoptimization block. - if_block->AddInstruction(new (GetGraph()->GetArena()) HGoto()); // placeholder + if_block->AddInstruction(new (GetGraph()->GetAllocator()) HGoto()); // placeholder HInstruction* condition = induction_range_.GenerateTakenTest( header->GetLastInstruction(), GetGraph(), if_block); DCHECK(condition != nullptr); if_block->RemoveInstruction(if_block->GetLastInstruction()); - if_block->AddInstruction(new (GetGraph()->GetArena()) HIf(condition)); + if_block->AddInstruction(new (GetGraph()->GetAllocator()) HIf(condition)); taken_test_loop_.Put(loop_id, true_block); } @@ -1740,7 +1827,7 @@ class BCEVisitor : public HGraphVisitor { // Scan all instructions in a new deoptimization block. for (HInstructionIterator it(true_block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); HPhi* phi = nullptr; // Scan all uses of an instruction and replace each later use with a phi node. const HUseList<HInstruction*>& uses = instruction->GetUses(); @@ -1783,20 +1870,20 @@ class BCEVisitor : public HGraphVisitor { */ HPhi* NewPhi(HBasicBlock* new_preheader, HInstruction* instruction, - Primitive::Type type) { + DataType::Type type) { HGraph* graph = GetGraph(); HInstruction* zero; switch (type) { - case Primitive::kPrimNot: zero = graph->GetNullConstant(); break; - case Primitive::kPrimFloat: zero = graph->GetFloatConstant(0); break; - case Primitive::kPrimDouble: zero = graph->GetDoubleConstant(0); break; + case DataType::Type::kReference: zero = graph->GetNullConstant(); break; + case DataType::Type::kFloat32: zero = graph->GetFloatConstant(0); break; + case DataType::Type::kFloat64: zero = graph->GetDoubleConstant(0); break; default: zero = graph->GetConstant(type, 0); break; } - HPhi* phi = new (graph->GetArena()) - HPhi(graph->GetArena(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); + HPhi* phi = new (graph->GetAllocator()) + HPhi(graph->GetAllocator(), kNoRegNumber, /*number_of_inputs*/ 2, HPhi::ToPhiType(type)); phi->SetRawInputAt(0, instruction); phi->SetRawInputAt(1, zero); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { phi->SetReferenceTypeInfo(instruction->GetReferenceTypeInfo()); } new_preheader->AddPhi(phi); @@ -1814,21 +1901,24 @@ class BCEVisitor : public HGraphVisitor { instruction->GetBlock()->RemoveInstruction(instruction); } + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator_; + // A set of maps, one per basic block, from instruction to range. - ArenaVector<ArenaSafeMap<int, ValueRange*>> maps_; + ScopedArenaVector<ScopedArenaSafeMap<int, ValueRange*>> maps_; // Map an HArrayLength instruction's id to the first HBoundsCheck instruction // in a block that checks an index against that HArrayLength. - ArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_; + ScopedArenaSafeMap<int, HBoundsCheck*> first_index_bounds_check_map_; // Early-exit loop bookkeeping. - ArenaSafeMap<uint32_t, bool> early_exit_loop_; + ScopedArenaSafeMap<uint32_t, bool> early_exit_loop_; // Taken-test loop bookkeeping. - ArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_; + ScopedArenaSafeMap<uint32_t, HBasicBlock*> taken_test_loop_; // Finite loop bookkeeping. - ArenaSet<uint32_t> finite_loop_; + ScopedArenaSet<uint32_t> finite_loop_; // Flag that denotes whether dominator-based dynamic elimination has occurred. bool has_dom_based_dynamic_bce_; diff --git a/compiler/optimizing/bounds_check_elimination.h b/compiler/optimizing/bounds_check_elimination.h index 6dc53207ea..79c67a8c7a 100644 --- a/compiler/optimizing/bounds_check_elimination.h +++ b/compiler/optimizing/bounds_check_elimination.h @@ -28,8 +28,9 @@ class BoundsCheckElimination : public HOptimization { public: BoundsCheckElimination(HGraph* graph, const SideEffectsAnalysis& side_effects, - HInductionVarAnalysis* induction_analysis) - : HOptimization(graph, kBoundsCheckEliminationPassName), + HInductionVarAnalysis* induction_analysis, + const char* name = kBoundsCheckEliminationPassName) + : HOptimization(graph, name), side_effects_(side_effects), induction_analysis_(induction_analysis) {} diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc index a949c33149..1523478613 100644 --- a/compiler/optimizing/bounds_check_elimination_test.cc +++ b/compiler/optimizing/bounds_check_elimination_test.cc @@ -14,8 +14,9 @@ * limitations under the License. */ -#include "base/arena_allocator.h" #include "bounds_check_elimination.h" + +#include "base/arena_allocator.h" #include "builder.h" #include "gvn.h" #include "induction_var_analysis.h" @@ -31,10 +32,9 @@ namespace art { /** * Fixture class for the BoundsCheckElimination tests. */ -class BoundsCheckEliminationTest : public testing::Test { +class BoundsCheckEliminationTest : public OptimizingUnitTest { public: - BoundsCheckEliminationTest() : pool_(), allocator_(&pool_) { - graph_ = CreateGraph(&allocator_); + BoundsCheckEliminationTest() : graph_(CreateGraph()) { graph_->SetHasBoundsChecks(true); } @@ -56,8 +56,6 @@ class BoundsCheckEliminationTest : public testing::Test { BoundsCheckElimination(graph_, side_effects, &induction).Run(); } - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; }; @@ -66,83 +64,83 @@ class BoundsCheckEliminationTest : public testing::Test { // else if (i >= array.length) { array[i] = 1; // Can't eliminate. } // else { array[i] = 1; // Can eliminate. } TEST_F(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); - HInstruction* parameter1 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); // array - HInstruction* parameter2 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); // i + HInstruction* parameter1 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); // array + HInstruction* parameter2 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); // i entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); HInstruction* constant_1 = graph_->GetIntConstant(1); HInstruction* constant_0 = graph_->GetIntConstant(0); - HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block1 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block1); - HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(parameter2, constant_0); - HIf* if_inst = new (&allocator_) HIf(cmp); + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(parameter2, constant_0); + HIf* if_inst = new (GetAllocator()) HIf(cmp); block1->AddInstruction(cmp); block1->AddInstruction(if_inst); entry->AddSuccessor(block1); - HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block2 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block2); - HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0); - HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check2 = new (&allocator_) + HNullCheck* null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check2 = new (GetAllocator()) HBoundsCheck(parameter2, array_length, 0); - HArraySet* array_set = new (&allocator_) HArraySet( - null_check, bounds_check2, constant_1, Primitive::kPrimInt, 0); + HArraySet* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check2, constant_1, DataType::Type::kInt32, 0); block2->AddInstruction(null_check); block2->AddInstruction(array_length); block2->AddInstruction(bounds_check2); block2->AddInstruction(array_set); - HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block3 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block3); - null_check = new (&allocator_) HNullCheck(parameter1, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - cmp = new (&allocator_) HLessThan(parameter2, array_length); - if_inst = new (&allocator_) HIf(cmp); + null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + cmp = new (GetAllocator()) HLessThan(parameter2, array_length); + if_inst = new (GetAllocator()) HIf(cmp); block3->AddInstruction(null_check); block3->AddInstruction(array_length); block3->AddInstruction(cmp); block3->AddInstruction(if_inst); - HBasicBlock* block4 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block4 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block4); - null_check = new (&allocator_) HNullCheck(parameter1, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check4 = new (&allocator_) + null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check4 = new (GetAllocator()) HBoundsCheck(parameter2, array_length, 0); - array_set = new (&allocator_) HArraySet( - null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check4, constant_1, DataType::Type::kInt32, 0); block4->AddInstruction(null_check); block4->AddInstruction(array_length); block4->AddInstruction(bounds_check4); block4->AddInstruction(array_set); - HBasicBlock* block5 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block5 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block5); - null_check = new (&allocator_) HNullCheck(parameter1, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check5 = new (&allocator_) + null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check5 = new (GetAllocator()) HBoundsCheck(parameter2, array_length, 0); - array_set = new (&allocator_) HArraySet( - null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check5, constant_1, DataType::Type::kInt32, 0); block5->AddInstruction(null_check); block5->AddInstruction(array_length); block5->AddInstruction(bounds_check5); block5->AddInstruction(array_set); - HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit); block2->AddSuccessor(exit); block4->AddSuccessor(exit); block5->AddSuccessor(exit); - exit->AddInstruction(new (&allocator_) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); block1->AddSuccessor(block3); // True successor block1->AddSuccessor(block2); // False successor @@ -163,13 +161,13 @@ TEST_F(BoundsCheckEliminationTest, NarrowingRangeArrayBoundsElimination) { // if (j < array.length) array[j] = 1; // Can't eliminate. // } TEST_F(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); - HInstruction* parameter1 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); // array - HInstruction* parameter2 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); // i + HInstruction* parameter1 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); // array + HInstruction* parameter2 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); // i entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); @@ -177,39 +175,40 @@ TEST_F(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { HInstruction* constant_0 = graph_->GetIntConstant(0); HInstruction* constant_max_int = graph_->GetIntConstant(INT_MAX); - HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block1 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block1); - HInstruction* cmp = new (&allocator_) HLessThanOrEqual(parameter2, constant_0); - HIf* if_inst = new (&allocator_) HIf(cmp); + HInstruction* cmp = new (GetAllocator()) HLessThanOrEqual(parameter2, constant_0); + HIf* if_inst = new (GetAllocator()) HIf(cmp); block1->AddInstruction(cmp); block1->AddInstruction(if_inst); entry->AddSuccessor(block1); - HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block2 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block2); - HInstruction* add = new (&allocator_) HAdd(Primitive::kPrimInt, parameter2, constant_max_int); - HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0); - HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* cmp2 = new (&allocator_) HGreaterThanOrEqual(add, array_length); - if_inst = new (&allocator_) HIf(cmp2); + HInstruction* add = + new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter2, constant_max_int); + HNullCheck* null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* cmp2 = new (GetAllocator()) HGreaterThanOrEqual(add, array_length); + if_inst = new (GetAllocator()) HIf(cmp2); block2->AddInstruction(add); block2->AddInstruction(null_check); block2->AddInstruction(array_length); block2->AddInstruction(cmp2); block2->AddInstruction(if_inst); - HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block3 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block3); - HBoundsCheck* bounds_check = new (&allocator_) + HBoundsCheck* bounds_check = new (GetAllocator()) HBoundsCheck(add, array_length, 0); - HArraySet* array_set = new (&allocator_) HArraySet( - null_check, bounds_check, constant_1, Primitive::kPrimInt, 0); + HArraySet* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check, constant_1, DataType::Type::kInt32, 0); block3->AddInstruction(bounds_check); block3->AddInstruction(array_set); - HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit); - exit->AddInstruction(new (&allocator_) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); block1->AddSuccessor(exit); // true successor block1->AddSuccessor(block2); // false successor block2->AddSuccessor(exit); // true successor @@ -227,13 +226,13 @@ TEST_F(BoundsCheckEliminationTest, OverflowArrayBoundsElimination) { // if (j > 0) array[j] = 1; // Can't eliminate. // } TEST_F(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); - HInstruction* parameter1 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); // array - HInstruction* parameter2 = new (&allocator_) - HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); // i + HInstruction* parameter1 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); // array + HInstruction* parameter2 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); // i entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); @@ -241,41 +240,42 @@ TEST_F(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { HInstruction* constant_0 = graph_->GetIntConstant(0); HInstruction* constant_max_int = graph_->GetIntConstant(INT_MAX); - HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block1 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block1); - HNullCheck* null_check = new (&allocator_) HNullCheck(parameter1, 0); - HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(parameter2, array_length); - HIf* if_inst = new (&allocator_) HIf(cmp); + HNullCheck* null_check = new (GetAllocator()) HNullCheck(parameter1, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(parameter2, array_length); + HIf* if_inst = new (GetAllocator()) HIf(cmp); block1->AddInstruction(null_check); block1->AddInstruction(array_length); block1->AddInstruction(cmp); block1->AddInstruction(if_inst); entry->AddSuccessor(block1); - HBasicBlock* block2 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block2 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block2); - HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, parameter2, constant_max_int); - HInstruction* sub2 = new (&allocator_) HSub(Primitive::kPrimInt, sub1, constant_max_int); - HInstruction* cmp2 = new (&allocator_) HLessThanOrEqual(sub2, constant_0); - if_inst = new (&allocator_) HIf(cmp2); + HInstruction* sub1 = + new (GetAllocator()) HSub(DataType::Type::kInt32, parameter2, constant_max_int); + HInstruction* sub2 = new (GetAllocator()) HSub(DataType::Type::kInt32, sub1, constant_max_int); + HInstruction* cmp2 = new (GetAllocator()) HLessThanOrEqual(sub2, constant_0); + if_inst = new (GetAllocator()) HIf(cmp2); block2->AddInstruction(sub1); block2->AddInstruction(sub2); block2->AddInstruction(cmp2); block2->AddInstruction(if_inst); - HBasicBlock* block3 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block3 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block3); - HBoundsCheck* bounds_check = new (&allocator_) + HBoundsCheck* bounds_check = new (GetAllocator()) HBoundsCheck(sub2, array_length, 0); - HArraySet* array_set = new (&allocator_) HArraySet( - null_check, bounds_check, constant_1, Primitive::kPrimInt, 0); + HArraySet* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check, constant_1, DataType::Type::kInt32, 0); block3->AddInstruction(bounds_check); block3->AddInstruction(array_set); - HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit); - exit->AddInstruction(new (&allocator_) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); block1->AddSuccessor(exit); // true successor block1->AddSuccessor(block2); // false successor block2->AddSuccessor(exit); // true successor @@ -291,11 +291,11 @@ TEST_F(BoundsCheckEliminationTest, UnderflowArrayBoundsElimination) { // array[5] = 1; // Can eliminate. // array[4] = 1; // Can eliminate. TEST_F(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); HInstruction* constant_5 = graph_->GetIntConstant(5); @@ -303,49 +303,49 @@ TEST_F(BoundsCheckEliminationTest, ConstantArrayBoundsElimination) { HInstruction* constant_6 = graph_->GetIntConstant(6); HInstruction* constant_1 = graph_->GetIntConstant(1); - HBasicBlock* block = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block); entry->AddSuccessor(block); - HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0); - HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check6 = new (&allocator_) + HNullCheck* null_check = new (GetAllocator()) HNullCheck(parameter, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check6 = new (GetAllocator()) HBoundsCheck(constant_6, array_length, 0); - HInstruction* array_set = new (&allocator_) HArraySet( - null_check, bounds_check6, constant_1, Primitive::kPrimInt, 0); + HInstruction* array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check6, constant_1, DataType::Type::kInt32, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); block->AddInstruction(bounds_check6); block->AddInstruction(array_set); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check5 = new (&allocator_) + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check5 = new (GetAllocator()) HBoundsCheck(constant_5, array_length, 0); - array_set = new (&allocator_) HArraySet( - null_check, bounds_check5, constant_1, Primitive::kPrimInt, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check5, constant_1, DataType::Type::kInt32, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); block->AddInstruction(bounds_check5); block->AddInstruction(array_set); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check4 = new (&allocator_) + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check4 = new (GetAllocator()) HBoundsCheck(constant_4, array_length, 0); - array_set = new (&allocator_) HArraySet( - null_check, bounds_check4, constant_1, Primitive::kPrimInt, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check4, constant_1, DataType::Type::kInt32, 0); block->AddInstruction(null_check); block->AddInstruction(array_length); block->AddInstruction(bounds_check4); block->AddInstruction(array_set); - block->AddInstruction(new (&allocator_) HGoto()); + block->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit); block->AddSuccessor(exit); - exit->AddInstruction(new (&allocator_) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); RunBCE(); @@ -364,7 +364,7 @@ static HInstruction* BuildSSAGraph1(HGraph* graph, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); HInstruction* constant_initial = graph->GetIntConstant(initial); @@ -388,7 +388,7 @@ static HInstruction* BuildSSAGraph1(HGraph* graph, loop_header->AddSuccessor(loop_body); // false successor loop_body->AddSuccessor(loop_header); - HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, DataType::Type::kInt32); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check, 0); HInstruction* cmp = nullptr; @@ -410,9 +410,9 @@ static HInstruction* BuildSSAGraph1(HGraph* graph, array_length = new (allocator) HArrayLength(null_check, 0); HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0); HInstruction* array_set = new (allocator) HArraySet( - null_check, bounds_check, constant_10, Primitive::kPrimInt, 0); + null_check, bounds_check, constant_10, DataType::Type::kInt32, 0); - HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + HInstruction* add = new (allocator) HAdd(DataType::Type::kInt32, phi, constant_increment); loop_body->AddInstruction(null_check); loop_body->AddInstruction(array_length); loop_body->AddInstruction(bounds_check); @@ -428,28 +428,28 @@ static HInstruction* BuildSSAGraph1(HGraph* graph, TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1a) { // for (int i=0; i<array.length; i++) { array[i] = 10; // Can eliminate with gvn. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 1); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), 0, 1); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1b) { // for (int i=1; i<array.length; i++) { array[i] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 1, 1); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), 1, 1); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1c) { // for (int i=-1; i<array.length; i++) { array[i] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, -1, 1); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), -1, 1); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1d) { // for (int i=0; i<=array.length; i++) { array[i] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 1, kCondGT); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), 0, 1, kCondGT); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } @@ -457,14 +457,14 @@ TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1d) { TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1e) { // for (int i=0; i<array.length; i += 2) { // array[i] = 10; // Can't eliminate due to overflow concern. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 0, 2); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), 0, 2); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination1f) { // for (int i=1; i<array.length; i += 2) { array[i] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph1(graph_, &allocator_, 1, 2); + HInstruction* bounds_check = BuildSSAGraph1(graph_, GetAllocator(), 1, 2); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } @@ -479,7 +479,7 @@ static HInstruction* BuildSSAGraph2(HGraph *graph, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); HInstruction* constant_initial = graph->GetIntConstant(initial); @@ -508,7 +508,7 @@ static HInstruction* BuildSSAGraph2(HGraph *graph, loop_header->AddSuccessor(loop_body); // false successor loop_body->AddSuccessor(loop_header); - HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, DataType::Type::kInt32); HInstruction* cmp = nullptr; if (cond == kCondLE) { cmp = new (allocator) HLessThanOrEqual(phi, constant_initial); @@ -522,13 +522,13 @@ static HInstruction* BuildSSAGraph2(HGraph *graph, loop_header->AddInstruction(if_inst); phi->AddInput(array_length); - HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_minus_1); + HInstruction* add = new (allocator) HAdd(DataType::Type::kInt32, phi, constant_minus_1); null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check, 0); HInstruction* bounds_check = new (allocator) HBoundsCheck(add, array_length, 0); HInstruction* array_set = new (allocator) HArraySet( - null_check, bounds_check, constant_10, Primitive::kPrimInt, 0); - HInstruction* add_phi = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + null_check, bounds_check, constant_10, DataType::Type::kInt32, 0); + HInstruction* add_phi = new (allocator) HAdd(DataType::Type::kInt32, phi, constant_increment); loop_body->AddInstruction(add); loop_body->AddInstruction(null_check); loop_body->AddInstruction(array_length); @@ -545,35 +545,35 @@ static HInstruction* BuildSSAGraph2(HGraph *graph, TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2a) { // for (int i=array.length; i>0; i--) { array[i-1] = 10; // Can eliminate with gvn. } - HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0); + HInstruction* bounds_check = BuildSSAGraph2(graph_, GetAllocator(), 0); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2b) { // for (int i=array.length; i>1; i--) { array[i-1] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 1); + HInstruction* bounds_check = BuildSSAGraph2(graph_, GetAllocator(), 1); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2c) { // for (int i=array.length; i>-1; i--) { array[i-1] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, -1); + HInstruction* bounds_check = BuildSSAGraph2(graph_, GetAllocator(), -1); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2d) { // for (int i=array.length; i>=0; i--) { array[i-1] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0, -1, kCondLT); + HInstruction* bounds_check = BuildSSAGraph2(graph_, GetAllocator(), 0, -1, kCondLT); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination2e) { // for (int i=array.length; i>0; i-=2) { array[i-1] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph2(graph_, &allocator_, 0, -2); + HInstruction* bounds_check = BuildSSAGraph2(graph_, GetAllocator(), 0, -2); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } @@ -616,7 +616,7 @@ static HInstruction* BuildSSAGraph3(HGraph* graph, loop_header->AddSuccessor(loop_body); // false successor loop_body->AddSuccessor(loop_header); - HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, DataType::Type::kInt32); HInstruction* cmp = nullptr; if (cond == kCondGE) { cmp = new (allocator) HGreaterThanOrEqual(phi, constant_10); @@ -634,8 +634,8 @@ static HInstruction* BuildSSAGraph3(HGraph* graph, HArrayLength* array_length = new (allocator) HArrayLength(null_check, 0); HInstruction* bounds_check = new (allocator) HBoundsCheck(phi, array_length, 0); HInstruction* array_set = new (allocator) HArraySet( - null_check, bounds_check, constant_10, Primitive::kPrimInt, 0); - HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_increment); + null_check, bounds_check, constant_10, DataType::Type::kInt32, 0); + HInstruction* add = new (allocator) HAdd(DataType::Type::kInt32, phi, constant_increment); loop_body->AddInstruction(null_check); loop_body->AddInstruction(array_length); loop_body->AddInstruction(bounds_check); @@ -652,7 +652,7 @@ static HInstruction* BuildSSAGraph3(HGraph* graph, TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3a) { // int[] array = new int[10]; // for (int i=0; i<10; i++) { array[i] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 0, 1, kCondGE); + HInstruction* bounds_check = BuildSSAGraph3(graph_, GetAllocator(), 0, 1, kCondGE); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } @@ -660,7 +660,7 @@ TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3a) { TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3b) { // int[] array = new int[10]; // for (int i=1; i<10; i++) { array[i] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 1, 1, kCondGE); + HInstruction* bounds_check = BuildSSAGraph3(graph_, GetAllocator(), 1, 1, kCondGE); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } @@ -668,7 +668,7 @@ TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3b) { TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3c) { // int[] array = new int[10]; // for (int i=0; i<=10; i++) { array[i] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 0, 1, kCondGT); + HInstruction* bounds_check = BuildSSAGraph3(graph_, GetAllocator(), 0, 1, kCondGT); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } @@ -676,7 +676,7 @@ TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3c) { TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination3d) { // int[] array = new int[10]; // for (int i=1; i<10; i+=8) { array[i] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph3(graph_, &allocator_, 1, 8, kCondGE); + HInstruction* bounds_check = BuildSSAGraph3(graph_, GetAllocator(), 1, 8, kCondGE); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } @@ -690,7 +690,7 @@ static HInstruction* BuildSSAGraph4(HGraph* graph, graph->AddBlock(entry); graph->SetEntryBlock(entry); HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); HInstruction* constant_initial = graph->GetIntConstant(initial); @@ -715,7 +715,7 @@ static HInstruction* BuildSSAGraph4(HGraph* graph, loop_header->AddSuccessor(loop_body); // false successor loop_body->AddSuccessor(loop_header); - HPhi* phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + HPhi* phi = new (allocator) HPhi(allocator, 0, 0, DataType::Type::kInt32); HInstruction* null_check = new (allocator) HNullCheck(parameter, 0); HInstruction* array_length = new (allocator) HArrayLength(null_check, 0); HInstruction* cmp = nullptr; @@ -734,13 +734,13 @@ static HInstruction* BuildSSAGraph4(HGraph* graph, null_check = new (allocator) HNullCheck(parameter, 0); array_length = new (allocator) HArrayLength(null_check, 0); - HInstruction* sub = new (allocator) HSub(Primitive::kPrimInt, array_length, phi); + HInstruction* sub = new (allocator) HSub(DataType::Type::kInt32, array_length, phi); HInstruction* add_minus_1 = new (allocator) - HAdd(Primitive::kPrimInt, sub, constant_minus_1); + HAdd(DataType::Type::kInt32, sub, constant_minus_1); HInstruction* bounds_check = new (allocator) HBoundsCheck(add_minus_1, array_length, 0); HInstruction* array_set = new (allocator) HArraySet( - null_check, bounds_check, constant_10, Primitive::kPrimInt, 0); - HInstruction* add = new (allocator) HAdd(Primitive::kPrimInt, phi, constant_1); + null_check, bounds_check, constant_10, DataType::Type::kInt32, 0); + HInstruction* add = new (allocator) HAdd(DataType::Type::kInt32, phi, constant_1); loop_body->AddInstruction(null_check); loop_body->AddInstruction(array_length); loop_body->AddInstruction(sub); @@ -758,21 +758,21 @@ static HInstruction* BuildSSAGraph4(HGraph* graph, TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4a) { // for (int i=0; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate with gvn. } - HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 0); + HInstruction* bounds_check = BuildSSAGraph4(graph_, GetAllocator(), 0); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4b) { // for (int i=1; i<array.length; i++) { array[array.length-i-1] = 10; // Can eliminate. } - HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 1); + HInstruction* bounds_check = BuildSSAGraph4(graph_, GetAllocator(), 1); RunBCE(); ASSERT_TRUE(IsRemoved(bounds_check)); } TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4c) { // for (int i=0; i<=array.length; i++) { array[array.length-i] = 10; // Can't eliminate. } - HInstruction* bounds_check = BuildSSAGraph4(graph_, &allocator_, 0, kCondGT); + HInstruction* bounds_check = BuildSSAGraph4(graph_, GetAllocator(), 0, kCondGT); RunBCE(); ASSERT_FALSE(IsRemoved(bounds_check)); } @@ -789,34 +789,34 @@ TEST_F(BoundsCheckEliminationTest, LoopArrayBoundsElimination4c) { // } // } TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); HInstruction* constant_0 = graph_->GetIntConstant(0); HInstruction* constant_minus_1 = graph_->GetIntConstant(-1); HInstruction* constant_1 = graph_->GetIntConstant(1); - HBasicBlock* block = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block); entry->AddSuccessor(block); - block->AddInstruction(new (&allocator_) HGoto()); + block->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* exit = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit); - exit->AddInstruction(new (&allocator_) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); - HBasicBlock* outer_header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* outer_header = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(outer_header); - HPhi* phi_i = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); - HNullCheck* null_check = new (&allocator_) HNullCheck(parameter, 0); - HArrayLength* array_length = new (&allocator_) HArrayLength(null_check, 0); - HAdd* add = new (&allocator_) HAdd(Primitive::kPrimInt, array_length, constant_minus_1); - HInstruction* cmp = new (&allocator_) HGreaterThanOrEqual(phi_i, add); - HIf* if_inst = new (&allocator_) HIf(cmp); + HPhi* phi_i = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + HNullCheck* null_check = new (GetAllocator()) HNullCheck(parameter, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HAdd* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_length, constant_minus_1); + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(phi_i, add); + HIf* if_inst = new (GetAllocator()) HIf(cmp); outer_header->AddPhi(phi_i); outer_header->AddInstruction(null_check); outer_header->AddInstruction(array_length); @@ -825,15 +825,15 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { outer_header->AddInstruction(if_inst); phi_i->AddInput(constant_0); - HBasicBlock* inner_header = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* inner_header = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(inner_header); - HPhi* phi_j = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HSub* sub = new (&allocator_) HSub(Primitive::kPrimInt, array_length, phi_i); - add = new (&allocator_) HAdd(Primitive::kPrimInt, sub, constant_minus_1); - cmp = new (&allocator_) HGreaterThanOrEqual(phi_j, add); - if_inst = new (&allocator_) HIf(cmp); + HPhi* phi_j = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HSub* sub = new (GetAllocator()) HSub(DataType::Type::kInt32, array_length, phi_i); + add = new (GetAllocator()) HAdd(DataType::Type::kInt32, sub, constant_minus_1); + cmp = new (GetAllocator()) HGreaterThanOrEqual(phi_j, add); + if_inst = new (GetAllocator()) HIf(cmp); inner_header->AddPhi(phi_j); inner_header->AddInstruction(null_check); inner_header->AddInstruction(array_length); @@ -843,25 +843,25 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { inner_header->AddInstruction(if_inst); phi_j->AddInput(constant_0); - HBasicBlock* inner_body_compare = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* inner_body_compare = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(inner_body_compare); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check1 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0); - HArrayGet* array_get_j = new (&allocator_) - HArrayGet(null_check, bounds_check1, Primitive::kPrimInt, 0); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check1 = new (GetAllocator()) HBoundsCheck(phi_j, array_length, 0); + HArrayGet* array_get_j = new (GetAllocator()) + HArrayGet(null_check, bounds_check1, DataType::Type::kInt32, 0); inner_body_compare->AddInstruction(null_check); inner_body_compare->AddInstruction(array_length); inner_body_compare->AddInstruction(bounds_check1); inner_body_compare->AddInstruction(array_get_j); - HInstruction* j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HBoundsCheck* bounds_check2 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0); - HArrayGet* array_get_j_plus_1 = new (&allocator_) - HArrayGet(null_check, bounds_check2, Primitive::kPrimInt, 0); - cmp = new (&allocator_) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1); - if_inst = new (&allocator_) HIf(cmp); + HInstruction* j_plus_1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_j, constant_1); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HBoundsCheck* bounds_check2 = new (GetAllocator()) HBoundsCheck(j_plus_1, array_length, 0); + HArrayGet* array_get_j_plus_1 = new (GetAllocator()) + HArrayGet(null_check, bounds_check2, DataType::Type::kInt32, 0); + cmp = new (GetAllocator()) HGreaterThanOrEqual(array_get_j, array_get_j_plus_1); + if_inst = new (GetAllocator()) HIf(cmp); inner_body_compare->AddInstruction(j_plus_1); inner_body_compare->AddInstruction(null_check); inner_body_compare->AddInstruction(array_length); @@ -870,63 +870,63 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { inner_body_compare->AddInstruction(cmp); inner_body_compare->AddInstruction(if_inst); - HBasicBlock* inner_body_swap = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* inner_body_swap = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(inner_body_swap); - j_plus_1 = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1); + j_plus_1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_j, constant_1); // temp = array[j+1] - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* bounds_check3 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0); - array_get_j_plus_1 = new (&allocator_) - HArrayGet(null_check, bounds_check3, Primitive::kPrimInt, 0); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* bounds_check3 = new (GetAllocator()) HBoundsCheck(j_plus_1, array_length, 0); + array_get_j_plus_1 = new (GetAllocator()) + HArrayGet(null_check, bounds_check3, DataType::Type::kInt32, 0); inner_body_swap->AddInstruction(j_plus_1); inner_body_swap->AddInstruction(null_check); inner_body_swap->AddInstruction(array_length); inner_body_swap->AddInstruction(bounds_check3); inner_body_swap->AddInstruction(array_get_j_plus_1); // array[j+1] = array[j] - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* bounds_check4 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0); - array_get_j = new (&allocator_) - HArrayGet(null_check, bounds_check4, Primitive::kPrimInt, 0); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* bounds_check4 = new (GetAllocator()) HBoundsCheck(phi_j, array_length, 0); + array_get_j = new (GetAllocator()) + HArrayGet(null_check, bounds_check4, DataType::Type::kInt32, 0); inner_body_swap->AddInstruction(null_check); inner_body_swap->AddInstruction(array_length); inner_body_swap->AddInstruction(bounds_check4); inner_body_swap->AddInstruction(array_get_j); - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* bounds_check5 = new (&allocator_) HBoundsCheck(j_plus_1, array_length, 0); - HArraySet* array_set_j_plus_1 = new (&allocator_) - HArraySet(null_check, bounds_check5, array_get_j, Primitive::kPrimInt, 0); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* bounds_check5 = new (GetAllocator()) HBoundsCheck(j_plus_1, array_length, 0); + HArraySet* array_set_j_plus_1 = new (GetAllocator()) + HArraySet(null_check, bounds_check5, array_get_j, DataType::Type::kInt32, 0); inner_body_swap->AddInstruction(null_check); inner_body_swap->AddInstruction(array_length); inner_body_swap->AddInstruction(bounds_check5); inner_body_swap->AddInstruction(array_set_j_plus_1); // array[j] = temp - null_check = new (&allocator_) HNullCheck(parameter, 0); - array_length = new (&allocator_) HArrayLength(null_check, 0); - HInstruction* bounds_check6 = new (&allocator_) HBoundsCheck(phi_j, array_length, 0); - HArraySet* array_set_j = new (&allocator_) - HArraySet(null_check, bounds_check6, array_get_j_plus_1, Primitive::kPrimInt, 0); + null_check = new (GetAllocator()) HNullCheck(parameter, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HInstruction* bounds_check6 = new (GetAllocator()) HBoundsCheck(phi_j, array_length, 0); + HArraySet* array_set_j = new (GetAllocator()) + HArraySet(null_check, bounds_check6, array_get_j_plus_1, DataType::Type::kInt32, 0); inner_body_swap->AddInstruction(null_check); inner_body_swap->AddInstruction(array_length); inner_body_swap->AddInstruction(bounds_check6); inner_body_swap->AddInstruction(array_set_j); - inner_body_swap->AddInstruction(new (&allocator_) HGoto()); + inner_body_swap->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* inner_body_add = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* inner_body_add = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(inner_body_add); - add = new (&allocator_) HAdd(Primitive::kPrimInt, phi_j, constant_1); + add = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_j, constant_1); inner_body_add->AddInstruction(add); - inner_body_add->AddInstruction(new (&allocator_) HGoto()); + inner_body_add->AddInstruction(new (GetAllocator()) HGoto()); phi_j->AddInput(add); - HBasicBlock* outer_body_add = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* outer_body_add = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(outer_body_add); - add = new (&allocator_) HAdd(Primitive::kPrimInt, phi_i, constant_1); + add = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi_i, constant_1); outer_body_add->AddInstruction(add); - outer_body_add->AddInstruction(new (&allocator_) HGoto()); + outer_body_add->AddInstruction(new (GetAllocator()) HGoto()); phi_i->AddInput(add); block->AddSuccessor(outer_header); @@ -950,4 +950,155 @@ TEST_F(BoundsCheckEliminationTest, BubbleSortArrayBoundsElimination) { ASSERT_TRUE(IsRemoved(bounds_check6)); } +// int[] array = new int[10]; +// for (int i=0; i<200; i++) { +// array[i%10] = 10; // Can eliminate +// array[i%1] = 10; // Can eliminate +// array[i%200] = 10; // Cannot eliminate +// array[i%-10] = 10; // Can eliminate +// array[i%array.length] = 10; // Can eliminate +// array[param_i%10] = 10; // Can't eliminate, when param_i < 0 +// } +TEST_F(BoundsCheckEliminationTest, ModArrayBoundsElimination) { + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + HInstruction* param_i = new (GetAllocator()) + HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); + entry->AddInstruction(param_i); + + HInstruction* constant_0 = graph_->GetIntConstant(0); + HInstruction* constant_1 = graph_->GetIntConstant(1); + HInstruction* constant_10 = graph_->GetIntConstant(10); + HInstruction* constant_200 = graph_->GetIntConstant(200); + HInstruction* constant_minus_10 = graph_->GetIntConstant(-10); + + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(block); + entry->AddSuccessor(block); + // We pass a bogus constant for the class to avoid mocking one. + HInstruction* new_array = new (GetAllocator()) HNewArray(constant_10, constant_10, 0); + block->AddInstruction(new_array); + block->AddInstruction(new (GetAllocator()) HGoto()); + + HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(loop_header); + graph_->AddBlock(loop_body); + graph_->AddBlock(exit); + block->AddSuccessor(loop_header); + loop_header->AddSuccessor(exit); // true successor + loop_header->AddSuccessor(loop_body); // false successor + loop_body->AddSuccessor(loop_header); + + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + HInstruction* cmp = new (GetAllocator()) HGreaterThanOrEqual(phi, constant_200); + HInstruction* if_inst = new (GetAllocator()) HIf(cmp); + loop_header->AddPhi(phi); + loop_header->AddInstruction(cmp); + loop_header->AddInstruction(if_inst); + phi->AddInput(constant_0); + + ////////////////////////////////////////////////////////////////////////////////// + // LOOP BODY: + // array[i % 10] = 10; + HRem* i_mod_10 = new (GetAllocator()) HRem(DataType::Type::kInt32, phi, constant_10, 0); + HBoundsCheck* bounds_check_i_mod_10 = new (GetAllocator()) HBoundsCheck(i_mod_10, constant_10, 0); + HInstruction* array_set = new (GetAllocator()) HArraySet( + new_array, bounds_check_i_mod_10, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(i_mod_10); + loop_body->AddInstruction(bounds_check_i_mod_10); + loop_body->AddInstruction(array_set); + + // array[i % 1] = 10; + HRem* i_mod_1 = new (GetAllocator()) HRem(DataType::Type::kInt32, phi, constant_1, 0); + HBoundsCheck* bounds_check_i_mod_1 = new (GetAllocator()) HBoundsCheck(i_mod_1, constant_10, 0); + array_set = new (GetAllocator()) HArraySet( + new_array, bounds_check_i_mod_1, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(i_mod_1); + loop_body->AddInstruction(bounds_check_i_mod_1); + loop_body->AddInstruction(array_set); + + // array[i % 200] = 10; + HRem* i_mod_200 = new (GetAllocator()) HRem(DataType::Type::kInt32, phi, constant_1, 0); + HBoundsCheck* bounds_check_i_mod_200 = new (GetAllocator()) HBoundsCheck( + i_mod_200, constant_10, 0); + array_set = new (GetAllocator()) HArraySet( + new_array, bounds_check_i_mod_200, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(i_mod_200); + loop_body->AddInstruction(bounds_check_i_mod_200); + loop_body->AddInstruction(array_set); + + // array[i % -10] = 10; + HRem* i_mod_minus_10 = new (GetAllocator()) HRem( + DataType::Type::kInt32, phi, constant_minus_10, 0); + HBoundsCheck* bounds_check_i_mod_minus_10 = new (GetAllocator()) HBoundsCheck( + i_mod_minus_10, constant_10, 0); + array_set = new (GetAllocator()) HArraySet( + new_array, bounds_check_i_mod_minus_10, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(i_mod_minus_10); + loop_body->AddInstruction(bounds_check_i_mod_minus_10); + loop_body->AddInstruction(array_set); + + // array[i%array.length] = 10; + HNullCheck* null_check = new (GetAllocator()) HNullCheck(new_array, 0); + HArrayLength* array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HRem* i_mod_array_length = new (GetAllocator()) HRem( + DataType::Type::kInt32, phi, array_length, 0); + HBoundsCheck* bounds_check_i_mod_array_len = new (GetAllocator()) HBoundsCheck( + i_mod_array_length, array_length, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check_i_mod_array_len, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(i_mod_array_length); + loop_body->AddInstruction(bounds_check_i_mod_array_len); + loop_body->AddInstruction(array_set); + + // array[param_i % 10] = 10; + HRem* param_i_mod_10 = new (GetAllocator()) HRem(DataType::Type::kInt32, param_i, constant_10, 0); + HBoundsCheck* bounds_check_param_i_mod_10 = new (GetAllocator()) HBoundsCheck( + param_i_mod_10, constant_10, 0); + array_set = new (GetAllocator()) HArraySet( + new_array, bounds_check_param_i_mod_10, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(param_i_mod_10); + loop_body->AddInstruction(bounds_check_param_i_mod_10); + loop_body->AddInstruction(array_set); + + // array[param_i%array.length] = 10; + null_check = new (GetAllocator()) HNullCheck(new_array, 0); + array_length = new (GetAllocator()) HArrayLength(null_check, 0); + HRem* param_i_mod_array_length = new (GetAllocator()) HRem( + DataType::Type::kInt32, param_i, array_length, 0); + HBoundsCheck* bounds_check_param_i_mod_array_len = new (GetAllocator()) HBoundsCheck( + param_i_mod_array_length, array_length, 0); + array_set = new (GetAllocator()) HArraySet( + null_check, bounds_check_param_i_mod_array_len, constant_10, DataType::Type::kInt32, 0); + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(param_i_mod_array_length); + loop_body->AddInstruction(bounds_check_param_i_mod_array_len); + loop_body->AddInstruction(array_set); + + // i++; + HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, constant_1); + loop_body->AddInstruction(add); + loop_body->AddInstruction(new (GetAllocator()) HGoto()); + phi->AddInput(add); + ////////////////////////////////////////////////////////////////////////////////// + + exit->AddInstruction(new (GetAllocator()) HExit()); + + RunBCE(); + + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_10)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_1)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_200)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_minus_10)); + ASSERT_TRUE(IsRemoved(bounds_check_i_mod_array_len)); + ASSERT_FALSE(IsRemoved(bounds_check_param_i_mod_10)); +} + } // namespace art diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index 2927e1f7c0..a1a5692ef6 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -20,22 +20,58 @@ #include "base/arena_bit_vector.h" #include "base/bit_vector-inl.h" #include "base/logging.h" +#include "block_builder.h" +#include "data_type-inl.h" #include "dex/verified_method.h" #include "driver/compiler_options.h" +#include "driver/dex_compilation_unit.h" +#include "instruction_builder.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" -#include "primitive.h" +#include "optimizing_compiler_stats.h" +#include "ssa_builder.h" #include "thread.h" #include "utils/dex_cache_arrays_layout-inl.h" namespace art { -void HGraphBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) { - if (compilation_stats_ != nullptr) { - compilation_stats_->RecordStat(compilation_stat); - } -} +HGraphBuilder::HGraphBuilder(HGraph* graph, + const CodeItemDebugInfoAccessor& accessor, + const DexCompilationUnit* dex_compilation_unit, + const DexCompilationUnit* outer_compilation_unit, + CompilerDriver* driver, + CodeGenerator* code_generator, + OptimizingCompilerStats* compiler_stats, + ArrayRef<const uint8_t> interpreter_metadata, + VariableSizedHandleScope* handles) + : graph_(graph), + dex_file_(&graph->GetDexFile()), + code_item_accessor_(accessor), + dex_compilation_unit_(dex_compilation_unit), + outer_compilation_unit_(outer_compilation_unit), + compiler_driver_(driver), + code_generator_(code_generator), + compilation_stats_(compiler_stats), + interpreter_metadata_(interpreter_metadata), + handles_(handles), + return_type_(DataType::FromShorty(dex_compilation_unit_->GetShorty()[0])) {} + +HGraphBuilder::HGraphBuilder(HGraph* graph, + const DexCompilationUnit* dex_compilation_unit, + const CodeItemDebugInfoAccessor& accessor, + VariableSizedHandleScope* handles, + DataType::Type return_type) + : graph_(graph), + dex_file_(&graph->GetDexFile()), + code_item_accessor_(accessor), + dex_compilation_unit_(dex_compilation_unit), + outer_compilation_unit_(nullptr), + compiler_driver_(nullptr), + code_generator_(nullptr), + compilation_stats_(nullptr), + handles_(handles), + return_type_(return_type) {} bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { if (compiler_driver_ == nullptr) { @@ -49,21 +85,21 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { return false; } - if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) { + const uint32_t code_units = code_item_accessor_.InsnsSizeInCodeUnits(); + if (compiler_options.IsHugeMethod(code_units)) { VLOG(compiler) << "Skip compilation of huge method " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) - << ": " << code_item_.insns_size_in_code_units_ << " code units"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod); + << ": " << code_units << " code units"; + MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledHugeMethod); return true; } // If it's large and contains no branches, it's likely to be machine generated initialization. - if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_) - && (number_of_branches == 0)) { + if (compiler_options.IsLargeMethod(code_units) && (number_of_branches == 0)) { VLOG(compiler) << "Skip compilation of large method with no branch " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) - << ": " << code_item_.insns_size_in_code_units_ << " code units"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches); + << ": " << code_units << " code units"; + MaybeRecordStat(compilation_stats_, MethodCompilationStat::kNotCompiledLargeMethodNoBranches); return true; } @@ -71,22 +107,46 @@ bool HGraphBuilder::SkipCompilation(size_t number_of_branches) { } GraphAnalysisResult HGraphBuilder::BuildGraph() { + DCHECK(code_item_accessor_.HasCodeItem()); DCHECK(graph_->GetBlocks().empty()); - graph_->SetNumberOfVRegs(code_item_.registers_size_); - graph_->SetNumberOfInVRegs(code_item_.ins_size_); - graph_->SetMaximumNumberOfOutVRegs(code_item_.outs_size_); - graph_->SetHasTryCatch(code_item_.tries_size_ != 0); + graph_->SetNumberOfVRegs(code_item_accessor_.RegistersSize()); + graph_->SetNumberOfInVRegs(code_item_accessor_.InsSize()); + graph_->SetMaximumNumberOfOutVRegs(code_item_accessor_.OutsSize()); + graph_->SetHasTryCatch(code_item_accessor_.TriesSize() != 0); + + // Use ScopedArenaAllocator for all local allocations. + ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); + HBasicBlockBuilder block_builder(graph_, dex_file_, code_item_accessor_, &local_allocator); + SsaBuilder ssa_builder(graph_, + dex_compilation_unit_->GetClassLoader(), + dex_compilation_unit_->GetDexCache(), + handles_, + &local_allocator); + HInstructionBuilder instruction_builder(graph_, + &block_builder, + &ssa_builder, + dex_file_, + code_item_accessor_, + return_type_, + dex_compilation_unit_, + outer_compilation_unit_, + compiler_driver_, + code_generator_, + interpreter_metadata_, + compilation_stats_, + handles_, + &local_allocator); // 1) Create basic blocks and link them together. Basic blocks are left // unpopulated with the exception of synthetic blocks, e.g. HTryBoundaries. - if (!block_builder_.Build()) { + if (!block_builder.Build()) { return kAnalysisInvalidBytecode; } // 2) Decide whether to skip this method based on its code size and number // of branches. - if (SkipCompilation(block_builder_.GetNumberOfBranches())) { + if (SkipCompilation(block_builder.GetNumberOfBranches())) { return kAnalysisSkipped; } @@ -97,12 +157,72 @@ GraphAnalysisResult HGraphBuilder::BuildGraph() { } // 4) Populate basic blocks with instructions. - if (!instruction_builder_.Build()) { + if (!instruction_builder.Build()) { return kAnalysisInvalidBytecode; } // 5) Type the graph and eliminate dead/redundant phis. - return ssa_builder_.BuildSsa(); + return ssa_builder.BuildSsa(); +} + +void HGraphBuilder::BuildIntrinsicGraph(ArtMethod* method) { + DCHECK(!code_item_accessor_.HasCodeItem()); + DCHECK(graph_->GetBlocks().empty()); + + // Determine the number of arguments and associated vregs. + uint32_t method_idx = dex_compilation_unit_->GetDexMethodIndex(); + const char* shorty = dex_file_->GetMethodShorty(dex_file_->GetMethodId(method_idx)); + size_t num_args = strlen(shorty + 1); + size_t num_wide_args = std::count(shorty + 1, shorty + 1 + num_args, 'J') + + std::count(shorty + 1, shorty + 1 + num_args, 'D'); + size_t num_arg_vregs = num_args + num_wide_args + (dex_compilation_unit_->IsStatic() ? 0u : 1u); + + // For simplicity, reserve 2 vregs (the maximum) for return value regardless of the return type. + size_t return_vregs = 2u; + graph_->SetNumberOfVRegs(return_vregs + num_arg_vregs); + graph_->SetNumberOfInVRegs(num_arg_vregs); + graph_->SetMaximumNumberOfOutVRegs(num_arg_vregs); + graph_->SetHasTryCatch(false); + + // Use ScopedArenaAllocator for all local allocations. + ScopedArenaAllocator local_allocator(graph_->GetArenaStack()); + HBasicBlockBuilder block_builder(graph_, + dex_file_, + CodeItemDebugInfoAccessor(), + &local_allocator); + SsaBuilder ssa_builder(graph_, + dex_compilation_unit_->GetClassLoader(), + dex_compilation_unit_->GetDexCache(), + handles_, + &local_allocator); + HInstructionBuilder instruction_builder(graph_, + &block_builder, + &ssa_builder, + dex_file_, + CodeItemDebugInfoAccessor(), + return_type_, + dex_compilation_unit_, + outer_compilation_unit_, + compiler_driver_, + code_generator_, + interpreter_metadata_, + compilation_stats_, + handles_, + &local_allocator); + + // 1) Create basic blocks for the intrinsic and link them together. + block_builder.BuildIntrinsic(); + + // 2) Build the trivial dominator tree. + GraphAnalysisResult bdt_result = graph_->BuildDominatorTree(); + DCHECK_EQ(bdt_result, kAnalysisSuccess); + + // 3) Populate basic blocks with instructions for the intrinsic. + instruction_builder.BuildIntrinsic(method); + + // 4) Type the graph (no dead/redundant phis to eliminate). + GraphAnalysisResult build_ssa_result = ssa_builder.BuildSsa(); + DCHECK_EQ(build_ssa_result, kAnalysisSuccess); } } // namespace art diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 3a4c9dbd16..5a1914ce08 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -17,116 +17,66 @@ #ifndef ART_COMPILER_OPTIMIZING_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_BUILDER_H_ -#include "base/arena_containers.h" #include "base/arena_object.h" -#include "block_builder.h" -#include "dex_file.h" -#include "dex_file-inl.h" +#include "base/array_ref.h" +#include "dex/code_item_accessors.h" +#include "dex/dex_file-inl.h" +#include "dex/dex_file.h" #include "driver/compiler_driver.h" -#include "driver/dex_compilation_unit.h" -#include "instruction_builder.h" -#include "optimizing_compiler_stats.h" -#include "primitive.h" #include "nodes.h" -#include "ssa_builder.h" namespace art { +class ArtMethod; class CodeGenerator; +class DexCompilationUnit; +class OptimizingCompilerStats; class HGraphBuilder : public ValueObject { public: HGraphBuilder(HGraph* graph, - DexCompilationUnit* dex_compilation_unit, - const DexCompilationUnit* const outer_compilation_unit, - const DexFile* dex_file, - const DexFile::CodeItem& code_item, + const CodeItemDebugInfoAccessor& accessor, + const DexCompilationUnit* dex_compilation_unit, + const DexCompilationUnit* outer_compilation_unit, CompilerDriver* driver, CodeGenerator* code_generator, OptimizingCompilerStats* compiler_stats, - const uint8_t* interpreter_metadata, - Handle<mirror::DexCache> dex_cache, - VariableSizedHandleScope* handles) - : graph_(graph), - dex_file_(dex_file), - code_item_(code_item), - dex_compilation_unit_(dex_compilation_unit), - compiler_driver_(driver), - compilation_stats_(compiler_stats), - block_builder_(graph, dex_file, code_item), - ssa_builder_(graph, - dex_compilation_unit->GetClassLoader(), - dex_compilation_unit->GetDexCache(), - handles), - instruction_builder_(graph, - &block_builder_, - &ssa_builder_, - dex_file, - code_item_, - Primitive::GetType(dex_compilation_unit_->GetShorty()[0]), - dex_compilation_unit, - outer_compilation_unit, - driver, - code_generator, - interpreter_metadata, - compiler_stats, - dex_cache, - handles) {} + ArrayRef<const uint8_t> interpreter_metadata, + VariableSizedHandleScope* handles); // Only for unit testing. HGraphBuilder(HGraph* graph, - const DexFile::CodeItem& code_item, + const DexCompilationUnit* dex_compilation_unit, + const CodeItemDebugInfoAccessor& accessor, VariableSizedHandleScope* handles, - Primitive::Type return_type = Primitive::kPrimInt) - : graph_(graph), - dex_file_(nullptr), - code_item_(code_item), - dex_compilation_unit_(nullptr), - compiler_driver_(nullptr), - compilation_stats_(nullptr), - block_builder_(graph, nullptr, code_item), - ssa_builder_(graph, - handles->NewHandle<mirror::ClassLoader>(nullptr), - handles->NewHandle<mirror::DexCache>(nullptr), - handles), - instruction_builder_(graph, - &block_builder_, - &ssa_builder_, - /* dex_file */ nullptr, - code_item_, - return_type, - /* dex_compilation_unit */ nullptr, - /* outer_compilation_unit */ nullptr, - /* compiler_driver */ nullptr, - /* code_generator */ nullptr, - /* interpreter_metadata */ nullptr, - /* compiler_stats */ nullptr, - handles->NewHandle<mirror::DexCache>(nullptr), - handles) {} + DataType::Type return_type = DataType::Type::kInt32); GraphAnalysisResult BuildGraph(); + void BuildIntrinsicGraph(ArtMethod* method); static constexpr const char* kBuilderPassName = "builder"; private: - void MaybeRecordStat(MethodCompilationStat compilation_stat); bool SkipCompilation(size_t number_of_branches); HGraph* const graph_; const DexFile* const dex_file_; - const DexFile::CodeItem& code_item_; + const CodeItemDebugInfoAccessor code_item_accessor_; // null for intrinsic graph. // The compilation unit of the current method being compiled. Note that // it can be an inlined method. - DexCompilationUnit* const dex_compilation_unit_; + const DexCompilationUnit* const dex_compilation_unit_; - CompilerDriver* const compiler_driver_; + // The compilation unit of the enclosing method being compiled. + const DexCompilationUnit* const outer_compilation_unit_; - OptimizingCompilerStats* compilation_stats_; + CompilerDriver* const compiler_driver_; + CodeGenerator* const code_generator_; - HBasicBlockBuilder block_builder_; - SsaBuilder ssa_builder_; - HInstructionBuilder instruction_builder_; + OptimizingCompilerStats* const compilation_stats_; + const ArrayRef<const uint8_t> interpreter_metadata_; + VariableSizedHandleScope* const handles_; + const DataType::Type return_type_; DISALLOW_COPY_AND_ASSIGN(HGraphBuilder); }; diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc index c806dbfef6..3addaeecd9 100644 --- a/compiler/optimizing/cha_guard_optimization.cc +++ b/compiler/optimizing/cha_guard_optimization.cc @@ -36,7 +36,7 @@ class CHAGuardVisitor : HGraphVisitor { : HGraphVisitor(graph), block_has_cha_guard_(GetGraph()->GetBlocks().size(), 0, - graph->GetArena()->Adapter(kArenaAllocCHA)), + graph->GetAllocator()->Adapter(kArenaAllocCHA)), instruction_iterator_(nullptr) { number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards(); DCHECK_NE(number_of_guards_to_visit_, 0u); @@ -202,8 +202,8 @@ bool CHAGuardVisitor::HoistGuard(HShouldDeoptimizeFlag* flag, HInstruction* suspend = loop_info->GetSuspendCheck(); // Need a new deoptimize instruction that copies the environment // of the suspend instruction for the loop. - HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize( - GetGraph()->GetArena(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc()); + HDeoptimize* deoptimize = new (GetGraph()->GetAllocator()) HDeoptimize( + GetGraph()->GetAllocator(), compare, DeoptimizationKind::kCHA, suspend->GetDexPc()); pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction()); deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment( suspend->GetEnvironment(), loop_info->GetHeader()); diff --git a/compiler/optimizing/cha_guard_optimization.h b/compiler/optimizing/cha_guard_optimization.h index ba0cdb81fd..f14e07bd6c 100644 --- a/compiler/optimizing/cha_guard_optimization.h +++ b/compiler/optimizing/cha_guard_optimization.h @@ -26,8 +26,9 @@ namespace art { */ class CHAGuardOptimization : public HOptimization { public: - explicit CHAGuardOptimization(HGraph* graph) - : HOptimization(graph, kCHAGuardOptimizationPassName) {} + explicit CHAGuardOptimization(HGraph* graph, + const char* name = kCHAGuardOptimizationPassName) + : HOptimization(graph, name) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 2872cf7458..6abda9b302 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -42,23 +42,26 @@ #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" -#include "bytecode_utils.h" +#include "base/casts.h" +#include "base/leb128.h" #include "class_linker.h" #include "compiled_method.h" +#include "dex/bytecode_utils.h" +#include "dex/code_item_accessors-inl.h" #include "dex/verified_method.h" #include "driver/compiler_driver.h" #include "graph_visualizer.h" #include "intern_table.h" #include "intrinsics.h" -#include "leb128.h" #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" #include "mirror/object_reference.h" #include "mirror/reference.h" #include "mirror/string.h" #include "parallel_move_resolver.h" -#include "ssa_liveness_analysis.h" #include "scoped_thread_state_change-inl.h" +#include "ssa_liveness_analysis.h" +#include "stack_map_stream.h" #include "thread-current-inl.h" #include "utils/assembler.h" @@ -68,35 +71,35 @@ namespace art { static constexpr bool kEnableDexLayoutOptimizations = false; // Return whether a location is consistent with a type. -static bool CheckType(Primitive::Type type, Location location) { +static bool CheckType(DataType::Type type, Location location) { if (location.IsFpuRegister() || (location.IsUnallocated() && (location.GetPolicy() == Location::kRequiresFpuRegister))) { - return (type == Primitive::kPrimFloat) || (type == Primitive::kPrimDouble); + return (type == DataType::Type::kFloat32) || (type == DataType::Type::kFloat64); } else if (location.IsRegister() || (location.IsUnallocated() && (location.GetPolicy() == Location::kRequiresRegister))) { - return Primitive::IsIntegralType(type) || (type == Primitive::kPrimNot); + return DataType::IsIntegralType(type) || (type == DataType::Type::kReference); } else if (location.IsRegisterPair()) { - return type == Primitive::kPrimLong; + return type == DataType::Type::kInt64; } else if (location.IsFpuRegisterPair()) { - return type == Primitive::kPrimDouble; + return type == DataType::Type::kFloat64; } else if (location.IsStackSlot()) { - return (Primitive::IsIntegralType(type) && type != Primitive::kPrimLong) - || (type == Primitive::kPrimFloat) - || (type == Primitive::kPrimNot); + return (DataType::IsIntegralType(type) && type != DataType::Type::kInt64) + || (type == DataType::Type::kFloat32) + || (type == DataType::Type::kReference); } else if (location.IsDoubleStackSlot()) { - return (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); + return (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); } else if (location.IsConstant()) { if (location.GetConstant()->IsIntConstant()) { - return Primitive::IsIntegralType(type) && (type != Primitive::kPrimLong); + return DataType::IsIntegralType(type) && (type != DataType::Type::kInt64); } else if (location.GetConstant()->IsNullConstant()) { - return type == Primitive::kPrimNot; + return type == DataType::Type::kReference; } else if (location.GetConstant()->IsLongConstant()) { - return type == Primitive::kPrimLong; + return type == DataType::Type::kInt64; } else if (location.GetConstant()->IsFloatConstant()) { - return type == Primitive::kPrimFloat; + return type == DataType::Type::kFloat32; } else { return location.GetConstant()->IsDoubleConstant() - && (type == Primitive::kPrimDouble); + && (type == DataType::Type::kFloat64); } } else { return location.IsInvalid() || (location.GetPolicy() == Location::kAny); @@ -130,7 +133,7 @@ static bool CheckTypeConsistency(HInstruction* instruction) { HEnvironment* environment = instruction->GetEnvironment(); for (size_t i = 0; i < instruction->EnvironmentSize(); ++i) { if (environment->GetInstructionAt(i) != nullptr) { - Primitive::Type type = environment->GetInstructionAt(i)->GetType(); + DataType::Type type = environment->GetInstructionAt(i)->GetType(); DCHECK(CheckType(type, environment->GetLocationAt(i))) << type << " " << environment->GetLocationAt(i); } else { @@ -141,13 +144,156 @@ static bool CheckTypeConsistency(HInstruction* instruction) { return true; } -size_t CodeGenerator::GetCacheOffset(uint32_t index) { - return sizeof(GcRoot<mirror::Object>) * index; +class CodeGenerator::CodeGenerationData : public DeletableArenaObject<kArenaAllocCodeGenerator> { + public: + static std::unique_ptr<CodeGenerationData> Create(ArenaStack* arena_stack, + InstructionSet instruction_set) { + ScopedArenaAllocator allocator(arena_stack); + void* memory = allocator.Alloc<CodeGenerationData>(kArenaAllocCodeGenerator); + return std::unique_ptr<CodeGenerationData>( + ::new (memory) CodeGenerationData(std::move(allocator), instruction_set)); + } + + ScopedArenaAllocator* GetScopedAllocator() { + return &allocator_; + } + + void AddSlowPath(SlowPathCode* slow_path) { + slow_paths_.emplace_back(std::unique_ptr<SlowPathCode>(slow_path)); + } + + ArrayRef<const std::unique_ptr<SlowPathCode>> GetSlowPaths() const { + return ArrayRef<const std::unique_ptr<SlowPathCode>>(slow_paths_); + } + + StackMapStream* GetStackMapStream() { return &stack_map_stream_; } + + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string) { + jit_string_roots_.Overwrite(string_reference, + reinterpret_cast64<uint64_t>(string.GetReference())); + } + + uint64_t GetJitStringRootIndex(StringReference string_reference) const { + return jit_string_roots_.Get(string_reference); + } + + size_t GetNumberOfJitStringRoots() const { + return jit_string_roots_.size(); + } + + void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { + jit_class_roots_.Overwrite(type_reference, reinterpret_cast64<uint64_t>(klass.GetReference())); + } + + uint64_t GetJitClassRootIndex(TypeReference type_reference) const { + return jit_class_roots_.Get(type_reference); + } + + size_t GetNumberOfJitClassRoots() const { + return jit_class_roots_.size(); + } + + size_t GetNumberOfJitRoots() const { + return GetNumberOfJitStringRoots() + GetNumberOfJitClassRoots(); + } + + void EmitJitRoots(Handle<mirror::ObjectArray<mirror::Object>> roots) + REQUIRES_SHARED(Locks::mutator_lock_); + + private: + CodeGenerationData(ScopedArenaAllocator&& allocator, InstructionSet instruction_set) + : allocator_(std::move(allocator)), + stack_map_stream_(&allocator_, instruction_set), + slow_paths_(allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_string_roots_(StringReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)), + jit_class_roots_(TypeReferenceValueComparator(), + allocator_.Adapter(kArenaAllocCodeGenerator)) { + slow_paths_.reserve(kDefaultSlowPathsCapacity); + } + + static constexpr size_t kDefaultSlowPathsCapacity = 8; + + ScopedArenaAllocator allocator_; + StackMapStream stack_map_stream_; + ScopedArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; + + // Maps a StringReference (dex_file, string_index) to the index in the literal table. + // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; + + // Maps a ClassReference (dex_file, type_index) to the index in the literal table. + // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` + // will compute all the indices. + ScopedArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; +}; + +void CodeGenerator::CodeGenerationData::EmitJitRoots( + Handle<mirror::ObjectArray<mirror::Object>> roots) { + DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); + ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); + size_t index = 0; + for (auto& entry : jit_string_roots_) { + // Update the `roots` with the string, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr()); + DCHECK(roots->Get(index) != nullptr); + entry.second = index; + // Ensure the string is strongly interned. This is a requirement on how the JIT + // handles strings. b/32995596 + class_linker->GetInternTable()->InternStrong( + reinterpret_cast<mirror::String*>(roots->Get(index))); + ++index; + } + for (auto& entry : jit_class_roots_) { + // Update the `roots` with the class, and replace the address temporarily + // stored to the index in the table. + uint64_t address = entry.second; + roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr()); + DCHECK(roots->Get(index) != nullptr); + entry.second = index; + ++index; + } +} + +ScopedArenaAllocator* CodeGenerator::GetScopedAllocator() { + DCHECK(code_generation_data_ != nullptr); + return code_generation_data_->GetScopedAllocator(); +} + +StackMapStream* CodeGenerator::GetStackMapStream() { + DCHECK(code_generation_data_ != nullptr); + return code_generation_data_->GetStackMapStream(); +} + +void CodeGenerator::ReserveJitStringRoot(StringReference string_reference, + Handle<mirror::String> string) { + DCHECK(code_generation_data_ != nullptr); + code_generation_data_->ReserveJitStringRoot(string_reference, string); +} + +uint64_t CodeGenerator::GetJitStringRootIndex(StringReference string_reference) { + DCHECK(code_generation_data_ != nullptr); + return code_generation_data_->GetJitStringRootIndex(string_reference); } -size_t CodeGenerator::GetCachePointerOffset(uint32_t index) { - PointerSize pointer_size = InstructionSetPointerSize(GetInstructionSet()); - return static_cast<size_t>(pointer_size) * index; +void CodeGenerator::ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass) { + DCHECK(code_generation_data_ != nullptr); + code_generation_data_->ReserveJitClassRoot(type_reference, klass); +} + +uint64_t CodeGenerator::GetJitClassRootIndex(TypeReference type_reference) { + DCHECK(code_generation_data_ != nullptr); + return code_generation_data_->GetJitClassRootIndex(type_reference); +} + +void CodeGenerator::EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, + const uint8_t* roots_data ATTRIBUTE_UNUSED) { + DCHECK(code_generation_data_ != nullptr); + DCHECK_EQ(code_generation_data_->GetNumberOfJitStringRoots(), 0u); + DCHECK_EQ(code_generation_data_->GetNumberOfJitClassRoots(), 0u); } uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) { @@ -157,10 +303,10 @@ uint32_t CodeGenerator::GetArrayLengthOffset(HArrayLength* array_length) { } uint32_t CodeGenerator::GetArrayDataOffset(HArrayGet* array_get) { - DCHECK(array_get->GetType() == Primitive::kPrimChar || !array_get->IsStringCharAt()); + DCHECK(array_get->GetType() == DataType::Type::kUint16 || !array_get->IsStringCharAt()); return array_get->IsStringCharAt() ? mirror::String::ValueOffset().Uint32Value() - : mirror::Array::DataOffset(Primitive::ComponentSize(array_get->GetType())).Uint32Value(); + : mirror::Array::DataOffset(DataType::Size(array_get->GetType())).Uint32Value(); } bool CodeGenerator::GoesToNextBlock(HBasicBlock* current, HBasicBlock* next) const { @@ -210,9 +356,10 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { + DCHECK(code_generation_data_ != nullptr); size_t code_start = 0; - for (const std::unique_ptr<SlowPathCode>& slow_path_unique_ptr : slow_paths_) { - SlowPathCode* slow_path = slow_path_unique_ptr.get(); + for (const std::unique_ptr<SlowPathCode>& slow_path_ptr : code_generation_data_->GetSlowPaths()) { + SlowPathCode* slow_path = slow_path_ptr.get(); current_slow_path_ = slow_path; if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); @@ -227,7 +374,14 @@ void CodeGenerator::GenerateSlowPaths() { current_slow_path_ = nullptr; } +void CodeGenerator::InitializeCodeGenerationData() { + DCHECK(code_generation_data_ == nullptr); + code_generation_data_ = CodeGenerationData::Create(graph_->GetArenaStack(), GetInstructionSet()); +} + void CodeGenerator::Compile(CodeAllocator* allocator) { + InitializeCodeGenerationData(); + // The register allocator already called `InitializeCodeGeneration`, // where the frame size has been computed. DCHECK(block_order_ != nullptr); @@ -288,7 +442,8 @@ void CodeGenerator::Finalize(CodeAllocator* allocator) { GetAssembler()->FinalizeInstructions(code); } -void CodeGenerator::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) { +void CodeGenerator::EmitLinkerPatches( + ArenaVector<linker::LinkerPatch>* linker_patches ATTRIBUTE_UNUSED) { // No linker patches by default. } @@ -321,7 +476,7 @@ void CodeGenerator::InitializeCodeGeneration(size_t number_of_spill_slots, void CodeGenerator::CreateCommonInvokeLocationSummary( HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor) { - ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); @@ -373,6 +528,7 @@ void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall( break; case kVirtual: case kInterface: + case kPolymorphic: LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); UNREACHABLE(); } @@ -400,6 +556,9 @@ void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invok case kInterface: entrypoint = kQuickInvokeInterfaceTrampolineWithAccessCheck; break; + case kPolymorphic: + LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType(); + UNREACHABLE(); } InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), nullptr); } @@ -412,14 +571,14 @@ void CodeGenerator::GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke) { void CodeGenerator::CreateUnresolvedFieldLocationSummary( HInstruction* field_access, - Primitive::Type field_type, + DataType::Type field_type, const FieldAccessCallingConvention& calling_convention) { bool is_instance = field_access->IsUnresolvedInstanceFieldGet() || field_access->IsUnresolvedInstanceFieldSet(); bool is_get = field_access->IsUnresolvedInstanceFieldGet() || field_access->IsUnresolvedStaticFieldGet(); - ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = field_access->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(field_access, LocationSummary::kCallOnMainOnly); @@ -434,7 +593,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( // regardless of the the type. Because of that we forced to special case // the access to floating point values. if (is_get) { - if (Primitive::IsFloatingPointType(field_type)) { + if (DataType::IsFloatingPointType(field_type)) { // The return value will be stored in regular registers while register // allocator expects it in a floating point register. // Note We don't need to request additional temps because the return @@ -447,7 +606,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( } } else { size_t set_index = is_instance ? 1 : 0; - if (Primitive::IsFloatingPointType(field_type)) { + if (DataType::IsFloatingPointType(field_type)) { // The set value comes from a float location while the calling convention // expects it in a regular register location. Allocate a temp for it and // make the transfer at codegen. @@ -462,7 +621,7 @@ void CodeGenerator::CreateUnresolvedFieldLocationSummary( void CodeGenerator::GenerateUnresolvedFieldAccess( HInstruction* field_access, - Primitive::Type field_type, + DataType::Type field_type, uint32_t field_index, uint32_t dex_pc, const FieldAccessCallingConvention& calling_convention) { @@ -475,51 +634,52 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( bool is_get = field_access->IsUnresolvedInstanceFieldGet() || field_access->IsUnresolvedStaticFieldGet(); - if (!is_get && Primitive::IsFloatingPointType(field_type)) { + if (!is_get && DataType::IsFloatingPointType(field_type)) { // Copy the float value to be set into the calling convention register. // Note that using directly the temp location is problematic as we don't // support temp register pairs. To avoid boilerplate conversion code, use // the location from the calling convention. MoveLocation(calling_convention.GetSetValueLocation(field_type, is_instance), locations->InAt(is_instance ? 1 : 0), - (Primitive::Is64BitType(field_type) ? Primitive::kPrimLong : Primitive::kPrimInt)); + (DataType::Is64BitType(field_type) ? DataType::Type::kInt64 + : DataType::Type::kInt32)); } QuickEntrypointEnum entrypoint = kQuickSet8Static; // Initialize to anything to avoid warnings. switch (field_type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: entrypoint = is_instance ? (is_get ? kQuickGetBooleanInstance : kQuickSet8Instance) : (is_get ? kQuickGetBooleanStatic : kQuickSet8Static); break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: entrypoint = is_instance ? (is_get ? kQuickGetByteInstance : kQuickSet8Instance) : (is_get ? kQuickGetByteStatic : kQuickSet8Static); break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: entrypoint = is_instance ? (is_get ? kQuickGetShortInstance : kQuickSet16Instance) : (is_get ? kQuickGetShortStatic : kQuickSet16Static); break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: entrypoint = is_instance ? (is_get ? kQuickGetCharInstance : kQuickSet16Instance) : (is_get ? kQuickGetCharStatic : kQuickSet16Static); break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: entrypoint = is_instance ? (is_get ? kQuickGet32Instance : kQuickSet32Instance) : (is_get ? kQuickGet32Static : kQuickSet32Static); break; - case Primitive::kPrimNot: + case DataType::Type::kReference: entrypoint = is_instance ? (is_get ? kQuickGetObjInstance : kQuickSetObjInstance) : (is_get ? kQuickGetObjStatic : kQuickSetObjStatic); break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: entrypoint = is_instance ? (is_get ? kQuickGet64Instance : kQuickSet64Instance) : (is_get ? kQuickGet64Static : kQuickSet64Static); @@ -529,7 +689,7 @@ void CodeGenerator::GenerateUnresolvedFieldAccess( } InvokeRuntime(entrypoint, field_access, dex_pc, nullptr); - if (is_get && Primitive::IsFloatingPointType(field_type)) { + if (is_get && DataType::IsFloatingPointType(field_type)) { MoveLocation(locations->Out(), calling_convention.GetReturnLocation(field_type), field_type); } } @@ -539,7 +699,7 @@ void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls, Location runtime_return_location) { DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall); DCHECK_EQ(cls->InputCount(), 1u); - LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( cls, LocationSummary::kCallOnMainOnly); locations->SetInAt(0, Location::NoLocation()); locations->AddTemp(runtime_type_index_location); @@ -610,72 +770,54 @@ void CodeGenerator::AllocateLocations(HInstruction* instruction) { } } -void CodeGenerator::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const { - if (stats_ != nullptr) { - stats_->RecordStat(compilation_stat, count); - } -} - std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, InstructionSet instruction_set, const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats) { - ArenaAllocator* arena = graph->GetArena(); + ArenaAllocator* allocator = graph->GetAllocator(); switch (instruction_set) { #ifdef ART_ENABLE_CODEGEN_arm - case kArm: - case kThumb2: { + case InstructionSet::kArm: + case InstructionSet::kThumb2: { return std::unique_ptr<CodeGenerator>( - new (arena) arm::CodeGeneratorARMVIXL(graph, - *isa_features.AsArmInstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) arm::CodeGeneratorARMVIXL( + graph, *isa_features.AsArmInstructionSetFeatures(), compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_arm64 - case kArm64: { + case InstructionSet::kArm64: { return std::unique_ptr<CodeGenerator>( - new (arena) arm64::CodeGeneratorARM64(graph, - *isa_features.AsArm64InstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) arm64::CodeGeneratorARM64( + graph, *isa_features.AsArm64InstructionSetFeatures(), compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips - case kMips: { + case InstructionSet::kMips: { return std::unique_ptr<CodeGenerator>( - new (arena) mips::CodeGeneratorMIPS(graph, - *isa_features.AsMipsInstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) mips::CodeGeneratorMIPS( + graph, *isa_features.AsMipsInstructionSetFeatures(), compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_mips64 - case kMips64: { + case InstructionSet::kMips64: { return std::unique_ptr<CodeGenerator>( - new (arena) mips64::CodeGeneratorMIPS64(graph, - *isa_features.AsMips64InstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) mips64::CodeGeneratorMIPS64( + graph, *isa_features.AsMips64InstructionSetFeatures(), compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86 - case kX86: { + case InstructionSet::kX86: { return std::unique_ptr<CodeGenerator>( - new (arena) x86::CodeGeneratorX86(graph, - *isa_features.AsX86InstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) x86::CodeGeneratorX86( + graph, *isa_features.AsX86InstructionSetFeatures(), compiler_options, stats)); } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 - case kX86_64: { + case InstructionSet::kX86_64: { return std::unique_ptr<CodeGenerator>( - new (arena) x86_64::CodeGeneratorX86_64(graph, - *isa_features.AsX86_64InstructionSetFeatures(), - compiler_options, - stats)); + new (allocator) x86_64::CodeGeneratorX86_64( + graph, *isa_features.AsX86_64InstructionSetFeatures(), compiler_options, stats)); } #endif default: @@ -683,12 +825,54 @@ std::unique_ptr<CodeGenerator> CodeGenerator::Create(HGraph* graph, } } +CodeGenerator::CodeGenerator(HGraph* graph, + size_t number_of_core_registers, + size_t number_of_fpu_registers, + size_t number_of_register_pairs, + uint32_t core_callee_save_mask, + uint32_t fpu_callee_save_mask, + const CompilerOptions& compiler_options, + OptimizingCompilerStats* stats) + : frame_size_(0), + core_spill_mask_(0), + fpu_spill_mask_(0), + first_register_slot_in_slow_path_(0), + allocated_registers_(RegisterSet::Empty()), + blocked_core_registers_(graph->GetAllocator()->AllocArray<bool>(number_of_core_registers, + kArenaAllocCodeGenerator)), + blocked_fpu_registers_(graph->GetAllocator()->AllocArray<bool>(number_of_fpu_registers, + kArenaAllocCodeGenerator)), + number_of_core_registers_(number_of_core_registers), + number_of_fpu_registers_(number_of_fpu_registers), + number_of_register_pairs_(number_of_register_pairs), + core_callee_save_mask_(core_callee_save_mask), + fpu_callee_save_mask_(fpu_callee_save_mask), + block_order_(nullptr), + disasm_info_(nullptr), + stats_(stats), + graph_(graph), + compiler_options_(compiler_options), + current_slow_path_(nullptr), + current_block_index_(0), + is_leaf_(true), + requires_current_method_(false), + code_generation_data_() { +} + +CodeGenerator::~CodeGenerator() {} + void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size) { DCHECK(stack_map_size != nullptr); DCHECK(method_info_size != nullptr); - *stack_map_size = stack_map_stream_.PrepareForFillIn(); - *method_info_size = stack_map_stream_.ComputeMethodInfoSize(); + StackMapStream* stack_map_stream = GetStackMapStream(); + *stack_map_size = stack_map_stream->PrepareForFillIn(); + *method_info_size = stack_map_stream->ComputeMethodInfoSize(); +} + +size_t CodeGenerator::GetNumberOfJitRoots() const { + DCHECK(code_generation_data_ != nullptr); + return code_generation_data_->GetNumberOfJitRoots(); } static void CheckCovers(uint32_t dex_pc, @@ -716,7 +900,7 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, // One can write loops through try/catch, which we do not support for OSR anyway. return; } - ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); + ArenaVector<HSuspendCheck*> loop_headers(graph.GetAllocator()->Adapter(kArenaAllocMisc)); for (HBasicBlock* block : graph.GetReversePostOrder()) { if (block->IsLoopHeader()) { HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck(); @@ -725,13 +909,12 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, } } } - ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc)); - const uint16_t* code_ptr = code_item.insns_; - const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; - - size_t dex_pc = 0; - while (code_ptr < code_end) { - const Instruction& instruction = *Instruction::At(code_ptr); + ArenaVector<size_t> covered( + loop_headers.size(), 0, graph.GetAllocator()->Adapter(kArenaAllocMisc)); + for (const DexInstructionPcPair& pair : CodeItemInstructionAccessor(graph.GetDexFile(), + &code_item)) { + const uint32_t dex_pc = pair.DexPc(); + const Instruction& instruction = pair.Inst(); if (instruction.IsBranch()) { uint32_t target = dex_pc + instruction.GetTargetOffset(); CheckCovers(target, graph, code_info, loop_headers, &covered); @@ -747,8 +930,6 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, CheckCovers(target, graph, code_info, loop_headers, &covered); } } - dex_pc += instruction.SizeInCodeUnits(); - code_ptr += instruction.SizeInCodeUnits(); } for (size_t i = 0; i < covered.size(); ++i) { @@ -758,11 +939,12 @@ static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region, MemoryRegion method_info_region, - const DexFile::CodeItem& code_item) { - stack_map_stream_.FillInCodeInfo(stack_map_region); - stack_map_stream_.FillInMethodInfo(method_info_region); - if (kIsDebugBuild) { - CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), code_item); + const DexFile::CodeItem* code_item_for_osr_check) { + StackMapStream* stack_map_stream = GetStackMapStream(); + stack_map_stream->FillInCodeInfo(stack_map_region); + stack_map_stream->FillInMethodInfo(method_info_region); + if (kIsDebugBuild && code_item_for_osr_check != nullptr) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), *code_item_for_osr_check); } } @@ -785,40 +967,26 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, return; } if (instruction->IsRem()) { - Primitive::Type type = instruction->AsRem()->GetResultType(); - if ((type == Primitive::kPrimFloat) || (type == Primitive::kPrimDouble)) { + DataType::Type type = instruction->AsRem()->GetResultType(); + if ((type == DataType::Type::kFloat32) || (type == DataType::Type::kFloat64)) { return; } } } - uint32_t outer_dex_pc = dex_pc; - uint32_t outer_environment_size = 0; - uint32_t inlining_depth = 0; - if (instruction != nullptr) { - for (HEnvironment* environment = instruction->GetEnvironment(); - environment != nullptr; - environment = environment->GetParent()) { - outer_dex_pc = environment->GetDexPc(); - outer_environment_size = environment->Size(); - if (environment != instruction->GetEnvironment()) { - inlining_depth++; - } - } - } - // Collect PC infos for the mapping table. uint32_t native_pc = GetAssembler()->CodePosition(); + StackMapStream* stack_map_stream = GetStackMapStream(); if (instruction == nullptr) { // For stack overflow checks and native-debug-info entries without dex register // mapping (i.e. start of basic block or start of slow path). - stack_map_stream_.BeginStackMapEntry(outer_dex_pc, native_pc, 0, 0, 0, 0); - stack_map_stream_.EndStackMapEntry(); + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, 0, 0, 0, 0); + stack_map_stream->EndStackMapEntry(); return; } - LocationSummary* locations = instruction->GetLocations(); + LocationSummary* locations = instruction->GetLocations(); uint32_t register_mask = locations->GetRegisterMask(); DCHECK_EQ(register_mask & ~locations->GetLiveRegisters()->GetCoreRegisters(), 0u); if (locations->OnlyCallsOnSlowPath()) { @@ -833,26 +1001,37 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // The register mask must be a subset of callee-save registers. DCHECK_EQ(register_mask & core_callee_save_mask_, register_mask); } - stack_map_stream_.BeginStackMapEntry(outer_dex_pc, + + uint32_t outer_dex_pc = dex_pc; + uint32_t outer_environment_size = 0u; + uint32_t inlining_depth = 0; + HEnvironment* const environment = instruction->GetEnvironment(); + if (environment != nullptr) { + HEnvironment* outer_environment = environment; + while (outer_environment->GetParent() != nullptr) { + outer_environment = outer_environment->GetParent(); + ++inlining_depth; + } + outer_dex_pc = outer_environment->GetDexPc(); + outer_environment_size = outer_environment->Size(); + } + stack_map_stream->BeginStackMapEntry(outer_dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, inlining_depth); - - HEnvironment* const environment = instruction->GetEnvironment(); EmitEnvironment(environment, slow_path); // Record invoke info, the common case for the trampoline is super and static invokes. Only // record these to reduce oat file size. if (kEnableDexLayoutOptimizations) { - if (environment != nullptr && - instruction->IsInvoke() && - instruction->IsInvokeStaticOrDirect()) { - HInvoke* const invoke = instruction->AsInvoke(); - stack_map_stream_.AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex()); + if (instruction->IsInvokeStaticOrDirect()) { + HInvoke* const invoke = instruction->AsInvokeStaticOrDirect(); + DCHECK(environment != nullptr); + stack_map_stream->AddInvoke(invoke->GetInvokeType(), invoke->GetDexMethodIndex()); } } - stack_map_stream_.EndStackMapEntry(); + stack_map_stream->EndStackMapEntry(); HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); if (instruction->IsSuspendCheck() && @@ -863,10 +1042,10 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, // We duplicate the stack map as a marker that this stack map can be an OSR entry. // Duplicating it avoids having the runtime recognize and skip an OSR stack map. DCHECK(info->IsIrreducible()); - stack_map_stream_.BeginStackMapEntry( + stack_map_stream->BeginStackMapEntry( dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); EmitEnvironment(instruction->GetEnvironment(), slow_path); - stack_map_stream_.EndStackMapEntry(); + stack_map_stream->EndStackMapEntry(); if (kIsDebugBuild) { for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* in_environment = environment->GetInstructionAt(i); @@ -886,21 +1065,22 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, } else if (kIsDebugBuild) { // Ensure stack maps are unique, by checking that the native pc in the stack map // last emitted is different than the native pc of the stack map just emitted. - size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + size_t number_of_stack_maps = stack_map_stream->GetNumberOfStackMaps(); if (number_of_stack_maps > 1) { - DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_code_offset, - stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_code_offset); + DCHECK_NE(stack_map_stream->GetStackMap(number_of_stack_maps - 1).native_pc_code_offset, + stack_map_stream->GetStackMap(number_of_stack_maps - 2).native_pc_code_offset); } } } bool CodeGenerator::HasStackMapAtCurrentPc() { uint32_t pc = GetAssembler()->CodeSize(); - size_t count = stack_map_stream_.GetNumberOfStackMaps(); + StackMapStream* stack_map_stream = GetStackMapStream(); + size_t count = stack_map_stream->GetNumberOfStackMaps(); if (count == 0) { return false; } - CodeOffset native_pc_offset = stack_map_stream_.GetStackMap(count - 1).native_pc_code_offset; + CodeOffset native_pc_offset = stack_map_stream->GetStackMap(count - 1).native_pc_code_offset; return (native_pc_offset.Uint32Value(GetInstructionSet()) == pc); } @@ -917,7 +1097,8 @@ void CodeGenerator::MaybeRecordNativeDebugInfo(HInstruction* instruction, } void CodeGenerator::RecordCatchBlockInfo() { - ArenaAllocator* arena = graph_->GetArena(); + ArenaAllocator* allocator = graph_->GetAllocator(); + StackMapStream* stack_map_stream = GetStackMapStream(); for (HBasicBlock* block : *block_order_) { if (!block->IsCatchBlock()) { @@ -932,9 +1113,9 @@ void CodeGenerator::RecordCatchBlockInfo() { // The stack mask is not used, so we leave it empty. ArenaBitVector* stack_mask = - ArenaBitVector::Create(arena, 0, /* expandable */ true, kArenaAllocCodeGenerator); + ArenaBitVector::Create(allocator, 0, /* expandable */ true, kArenaAllocCodeGenerator); - stack_map_stream_.BeginStackMapEntry(dex_pc, + stack_map_stream->BeginStackMapEntry(dex_pc, native_pc, register_mask, stack_mask, @@ -952,19 +1133,19 @@ void CodeGenerator::RecordCatchBlockInfo() { } if (current_phi == nullptr || current_phi->AsPhi()->GetRegNumber() != vreg) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); } else { - Location location = current_phi->GetLiveInterval()->ToLocation(); + Location location = current_phi->GetLocations()->Out(); switch (location.GetKind()) { case Location::kStackSlot: { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++vreg; DCHECK_LT(vreg, num_vregs); @@ -979,17 +1160,23 @@ void CodeGenerator::RecordCatchBlockInfo() { } } - stack_map_stream_.EndStackMapEntry(); + stack_map_stream->EndStackMapEntry(); } } +void CodeGenerator::AddSlowPath(SlowPathCode* slow_path) { + DCHECK(code_generation_data_ != nullptr); + code_generation_data_->AddSlowPath(slow_path); +} + void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slow_path) { if (environment == nullptr) return; + StackMapStream* stack_map_stream = GetStackMapStream(); if (environment->GetParent() != nullptr) { // We emit the parent environment first. EmitEnvironment(environment->GetParent(), slow_path); - stack_map_stream_.BeginInlineInfoEntry(environment->GetMethod(), + stack_map_stream->BeginInlineInfoEntry(environment->GetMethod(), environment->GetDexPc(), environment->Size(), &graph_->GetDexFile()); @@ -999,7 +1186,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { HInstruction* current = environment->GetInstructionAt(i); if (current == nullptr) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); continue; } @@ -1009,43 +1196,43 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo DCHECK_EQ(current, location.GetConstant()); if (current->IsLongConstant()) { int64_t value = current->AsLongConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsDoubleConstant()) { int64_t value = bit_cast<int64_t, double>(current->AsDoubleConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kConstant, Low32Bits(value)); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kConstant, High32Bits(value)); ++i; DCHECK_LT(i, environment_size); } else if (current->IsIntConstant()) { int32_t value = current->AsIntConstant()->GetValue(); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } else if (current->IsNullConstant()) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, 0); } else { DCHECK(current->IsFloatConstant()) << current->DebugName(); int32_t value = bit_cast<int32_t, float>(current->AsFloatConstant()->GetValue()); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kConstant, value); } break; } case Location::kStackSlot: { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); break; } case Location::kDoubleStackSlot: { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetStackIndex()); - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, location.GetHighStackIndex(kVRegSize)); ++i; DCHECK_LT(i, environment_size); @@ -1056,17 +1243,17 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(id); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); - if (current->GetType() == Primitive::kPrimLong) { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + if (current->GetType() == DataType::Type::kInt64) { + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); - if (current->GetType() == Primitive::kPrimLong) { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, id); + if (current->GetType() == DataType::Type::kInt64) { + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); } @@ -1078,17 +1265,17 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int id = location.reg(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(id)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(id); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); - if (current->GetType() == Primitive::kPrimDouble) { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + if (current->GetType() == DataType::Type::kFloat64) { + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInStack, offset + kVRegSize); ++i; DCHECK_LT(i, environment_size); } } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); - if (current->GetType() == Primitive::kPrimDouble) { - stack_map_stream_.AddDexRegisterEntry( + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, id); + if (current->GetType() == DataType::Type::kFloat64) { + stack_map_stream->AddDexRegisterEntry( DexRegisterLocation::Kind::kInFpuRegisterHigh, id); ++i; DCHECK_LT(i, environment_size); @@ -1102,16 +1289,16 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(low); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, low); } if (slow_path != nullptr && slow_path->IsFpuRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfFpuRegister(high); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); ++i; } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInFpuRegister, high); ++i; } DCHECK_LT(i, environment_size); @@ -1123,15 +1310,15 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo int high = location.high(); if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(low)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(low); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, low); } if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(high)) { uint32_t offset = slow_path->GetStackOffsetOfCoreRegister(high); - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInStack, offset); } else { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kInRegister, high); } ++i; DCHECK_LT(i, environment_size); @@ -1139,7 +1326,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } case Location::kInvalid: { - stack_map_stream_.AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); + stack_map_stream->AddDexRegisterEntry(DexRegisterLocation::Kind::kNone, 0); break; } @@ -1149,7 +1336,7 @@ void CodeGenerator::EmitEnvironment(HEnvironment* environment, SlowPathCode* slo } if (environment->GetParent() != nullptr) { - stack_map_stream_.EndInlineInfoEntry(); + stack_map_stream->EndInlineInfoEntry(); } } @@ -1202,7 +1389,8 @@ LocationSummary* CodeGenerator::CreateThrowingSlowPathLocations(HInstruction* in if (can_throw_into_catch_block) { call_kind = LocationSummary::kCallOnSlowPath; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (can_throw_into_catch_block && compiler_options_.GetImplicitNullChecks()) { locations->SetCustomSlowPathCallerSaves(caller_saves); // Default: no caller-save registers. } @@ -1212,40 +1400,39 @@ LocationSummary* CodeGenerator::CreateThrowingSlowPathLocations(HInstruction* in void CodeGenerator::GenerateNullCheck(HNullCheck* instruction) { if (compiler_options_.GetImplicitNullChecks()) { - MaybeRecordStat(kImplicitNullCheckGenerated); + MaybeRecordStat(stats_, MethodCompilationStat::kImplicitNullCheckGenerated); GenerateImplicitNullCheck(instruction); } else { - MaybeRecordStat(kExplicitNullCheckGenerated); + MaybeRecordStat(stats_, MethodCompilationStat::kExplicitNullCheckGenerated); GenerateExplicitNullCheck(instruction); } } -void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const { +void CodeGenerator::ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, + HParallelMove* spills) const { LocationSummary* locations = suspend_check->GetLocations(); HBasicBlock* block = suspend_check->GetBlock(); DCHECK(block->GetLoopInformation()->GetSuspendCheck() == suspend_check); DCHECK(block->IsLoopHeader()); + DCHECK(block->GetFirstInstruction() == spills); - for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - HInstruction* current = it.Current(); - LiveInterval* interval = current->GetLiveInterval(); - // We only need to clear bits of loop phis containing objects and allocated in register. - // Loop phis allocated on stack already have the object in the stack. - if (current->GetType() == Primitive::kPrimNot - && interval->HasRegister() - && interval->HasSpillSlot()) { - locations->ClearStackBit(interval->GetSpillSlot() / kVRegSize); - } + for (size_t i = 0, num_moves = spills->NumMoves(); i != num_moves; ++i) { + Location dest = spills->MoveOperandsAt(i)->GetDestination(); + // All parallel moves in loop headers are spills. + DCHECK(dest.IsStackSlot() || dest.IsDoubleStackSlot() || dest.IsSIMDStackSlot()) << dest; + // Clear the stack bit marking a reference. Do not bother to check if the spill is + // actually a reference spill, clearing bits that are already zero is harmless. + locations->ClearStackBit(dest.GetStackIndex() / kVRegSize); } } void CodeGenerator::EmitParallelMoves(Location from1, Location to1, - Primitive::Type type1, + DataType::Type type1, Location from2, Location to2, - Primitive::Type type2) { - HParallelMove parallel_move(GetGraph()->GetArena()); + DataType::Type type2) { + HParallelMove parallel_move(GetGraph()->GetAllocator()); parallel_move.AddMove(from1, to1, type1, nullptr); parallel_move.AddMove(from2, to2, type2, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -1408,7 +1595,7 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { return; } - ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); LocationSummary* locations = new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); @@ -1427,31 +1614,7 @@ void CodeGenerator::CreateSystemArrayCopyLocationSummary(HInvoke* invoke) { void CodeGenerator::EmitJitRoots(uint8_t* code, Handle<mirror::ObjectArray<mirror::Object>> roots, const uint8_t* roots_data) { - DCHECK_EQ(static_cast<size_t>(roots->GetLength()), GetNumberOfJitRoots()); - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - size_t index = 0; - for (auto& entry : jit_string_roots_) { - // Update the `roots` with the string, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::String>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); - entry.second = index; - // Ensure the string is strongly interned. This is a requirement on how the JIT - // handles strings. b/32995596 - class_linker->GetInternTable()->InternStrong( - reinterpret_cast<mirror::String*>(roots->Get(index))); - ++index; - } - for (auto& entry : jit_class_roots_) { - // Update the `roots` with the class, and replace the address temporarily - // stored to the index in the table. - uint64_t address = entry.second; - roots->Set(index, reinterpret_cast<StackReference<mirror::Class>*>(address)->AsMirrorPtr()); - DCHECK(roots->Get(index) != nullptr); - entry.second = index; - ++index; - } + code_generation_data_->EmitJitRoots(roots); EmitJitRootPatches(code, roots_data); } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 73202b4fd1..f784a1a857 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -24,6 +24,8 @@ #include "base/bit_field.h" #include "base/bit_utils.h" #include "base/enums.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" #include "globals.h" #include "graph_visualizer.h" #include "locations.h" @@ -32,9 +34,7 @@ #include "optimizing_compiler_stats.h" #include "read_barrier_option.h" #include "stack.h" -#include "stack_map_stream.h" -#include "string_reference.h" -#include "type_reference.h" +#include "stack_map.h" #include "utils/label.h" namespace art { @@ -61,9 +61,13 @@ class Assembler; class CodeGenerator; class CompilerDriver; class CompilerOptions; -class LinkerPatch; +class StackMapStream; class ParallelMoveResolver; +namespace linker { +class LinkerPatch; +} // namespace linker + class CodeAllocator { public: CodeAllocator() {} @@ -143,8 +147,8 @@ class SlowPathCode : public DeletableArenaObject<kArenaAllocSlowPaths> { class InvokeDexCallingConventionVisitor { public: - virtual Location GetNextLocation(Primitive::Type type) = 0; - virtual Location GetReturnLocation(Primitive::Type type) const = 0; + virtual Location GetNextLocation(DataType::Type type) = 0; + virtual Location GetReturnLocation(DataType::Type type) const = 0; virtual Location GetMethodLocation() const = 0; protected: @@ -166,9 +170,9 @@ class FieldAccessCallingConvention { public: virtual Location GetObjectLocation() const = 0; virtual Location GetFieldIndexLocation() const = 0; - virtual Location GetReturnLocation(Primitive::Type type) const = 0; - virtual Location GetSetValueLocation(Primitive::Type type, bool is_instance) const = 0; - virtual Location GetFpuLocation(Primitive::Type type) const = 0; + virtual Location GetReturnLocation(DataType::Type type) const = 0; + virtual Location GetSetValueLocation(DataType::Type type, bool is_instance) const = 0; + virtual Location GetFpuLocation(DataType::Type type) const = 0; virtual ~FieldAccessCallingConvention() {} protected: @@ -187,7 +191,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { const InstructionSetFeatures& isa_features, const CompilerOptions& compiler_options, OptimizingCompilerStats* stats = nullptr); - virtual ~CodeGenerator() {} + virtual ~CodeGenerator(); // Get the graph. This is the outermost graph, never the graph of a method being inlined. HGraph* GetGraph() const { return graph_; } @@ -205,12 +209,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual void Initialize() = 0; virtual void Finalize(CodeAllocator* allocator); - virtual void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches); + virtual void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches); virtual void GenerateFrameEntry() = 0; virtual void GenerateFrameExit() = 0; virtual void Bind(HBasicBlock* block) = 0; virtual void MoveConstant(Location destination, int32_t value) = 0; - virtual void MoveLocation(Location dst, Location src, Primitive::Type dst_type) = 0; + virtual void MoveLocation(Location dst, Location src, DataType::Type dst_type) = 0; virtual void AddLocationAsTemp(Location location, LocationSummary* locations) = 0; virtual Assembler* GetAssembler() = 0; @@ -254,8 +258,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { const CompilerOptions& GetCompilerOptions() const { return compiler_options_; } - void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; - // Saves the register in the stack. Returns the size taken on stack. virtual size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) = 0; // Restores the register from the stack. Returns the size taken on stack. @@ -264,7 +266,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { virtual size_t SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; virtual size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) = 0; - virtual bool NeedsTwoRegisters(Primitive::Type type) const = 0; + virtual bool NeedsTwoRegisters(DataType::Type type) const = 0; // Returns whether we should split long moves in parallel moves. virtual bool ShouldSplitLongMoves() const { return false; } @@ -337,18 +339,16 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // TODO: Replace with a catch-entering instruction that records the environment. void RecordCatchBlockInfo(); - // TODO: Avoid creating the `std::unique_ptr` here. - void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path)); - } + // Get the ScopedArenaAllocator used for codegen memory allocation. + ScopedArenaAllocator* GetScopedAllocator(); + + void AddSlowPath(SlowPathCode* slow_path); void BuildStackMaps(MemoryRegion stack_map_region, MemoryRegion method_info_region, - const DexFile::CodeItem& code_item); + const DexFile::CodeItem* code_item_for_osr_check); void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size); - size_t GetNumberOfJitRoots() const { - return jit_string_roots_.size() + jit_class_roots_.size(); - } + size_t GetNumberOfJitRoots() const; // Fills the `literals` array with literals collected during code generation. // Also emits literal patches. @@ -379,7 +379,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // for the suspend check at the back edge (instead of where the suspend check // is, which is the loop entry). At this point, the spill slots for the phis // have not been written to. - void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check) const; + void ClearSpillSlotsFromLoopPhisInStackMap(HSuspendCheck* suspend_check, + HParallelMove* spills) const; bool* GetBlockedCoreRegisters() const { return blocked_core_registers_; } bool* GetBlockedFloatingPointRegisters() const { return blocked_fpu_registers_; } @@ -387,13 +388,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { bool IsBlockedCoreRegister(size_t i) { return blocked_core_registers_[i]; } bool IsBlockedFloatingPointRegister(size_t i) { return blocked_fpu_registers_[i]; } - // Helper that returns the pointer offset of an index in an object array. - // Note: this method assumes we always have the same pointer size, regardless - // of the architecture. - static size_t GetCacheOffset(uint32_t index); - // Pointer variant for ArtMethod and ArtField arrays. - size_t GetCachePointerOffset(uint32_t index); - // Helper that returns the offset of the array's length field. // Note: Besides the normal arrays, we also use the HArrayLength for // accessing the String's `count` field in String intrinsics. @@ -406,15 +400,59 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void EmitParallelMoves(Location from1, Location to1, - Primitive::Type type1, + DataType::Type type1, Location from2, Location to2, - Primitive::Type type2); + DataType::Type type2); + + static bool InstanceOfNeedsReadBarrier(HInstanceOf* instance_of) { + // Used only for kExactCheck, kAbstractClassCheck, kClassHierarchyCheck and kArrayObjectCheck. + DCHECK(instance_of->GetTypeCheckKind() == TypeCheckKind::kExactCheck || + instance_of->GetTypeCheckKind() == TypeCheckKind::kAbstractClassCheck || + instance_of->GetTypeCheckKind() == TypeCheckKind::kClassHierarchyCheck || + instance_of->GetTypeCheckKind() == TypeCheckKind::kArrayObjectCheck) + << instance_of->GetTypeCheckKind(); + // If the target class is in the boot image, it's non-moveable and it doesn't matter + // if we compare it with a from-space or to-space reference, the result is the same. + // It's OK to traverse a class hierarchy jumping between from-space and to-space. + return kEmitCompilerReadBarrier && !instance_of->GetTargetClass()->IsInBootImage(); + } + + static ReadBarrierOption ReadBarrierOptionForInstanceOf(HInstanceOf* instance_of) { + return InstanceOfNeedsReadBarrier(instance_of) ? kWithReadBarrier : kWithoutReadBarrier; + } + + static bool IsTypeCheckSlowPathFatal(HCheckCast* check_cast) { + switch (check_cast->GetTypeCheckKind()) { + case TypeCheckKind::kExactCheck: + case TypeCheckKind::kAbstractClassCheck: + case TypeCheckKind::kClassHierarchyCheck: + case TypeCheckKind::kArrayObjectCheck: + case TypeCheckKind::kInterfaceCheck: { + bool needs_read_barrier = + kEmitCompilerReadBarrier && !check_cast->GetTargetClass()->IsInBootImage(); + // We do not emit read barriers for HCheckCast, so we can get false negatives + // and the slow path shall re-check and simply return if the cast is actually OK. + return !needs_read_barrier; + } + case TypeCheckKind::kArrayCheck: + case TypeCheckKind::kUnresolvedCheck: + return false; + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } - static bool StoreNeedsWriteBarrier(Primitive::Type type, HInstruction* value) { + static LocationSummary::CallKind GetCheckCastCallKind(HCheckCast* check_cast) { + return (IsTypeCheckSlowPathFatal(check_cast) && !check_cast->CanThrowIntoCatchBlock()) + ? LocationSummary::kNoCall // In fact, call on a fatal (non-returning) slow path. + : LocationSummary::kCallOnSlowPath; + } + + static bool StoreNeedsWriteBarrier(DataType::Type type, HInstruction* value) { // Check that null value is not represented as an integer constant. - DCHECK(type != Primitive::kPrimNot || !value->IsIntConstant()); - return type == Primitive::kPrimNot && !value->IsNullConstant(); + DCHECK(type != DataType::Type::kReference || !value->IsIntConstant()); + return type == DataType::Type::kReference && !value->IsNullConstant(); } @@ -446,6 +484,16 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return GetFrameSize() == (CallPushesPC() ? GetWordSize() : 0); } + static int8_t GetInt8ValueOf(HConstant* constant) { + DCHECK(constant->IsIntConstant()); + return constant->AsIntConstant()->GetValue(); + } + + static int16_t GetInt16ValueOf(HConstant* constant) { + DCHECK(constant->IsIntConstant()); + return constant->AsIntConstant()->GetValue(); + } + static int32_t GetInt32ValueOf(HConstant* constant) { if (constant->IsIntConstant()) { return constant->AsIntConstant()->GetValue(); @@ -493,12 +541,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { void CreateUnresolvedFieldLocationSummary( HInstruction* field_access, - Primitive::Type field_type, + DataType::Type field_type, const FieldAccessCallingConvention& calling_convention); void GenerateUnresolvedFieldAccess( HInstruction* field_access, - Primitive::Type field_type, + DataType::Type field_type, uint32_t field_index, uint32_t dex_pc, const FieldAccessCallingConvention& calling_convention); @@ -562,7 +610,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0; // Copy the result of a call into the given target. - virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0; + virtual void MoveFromReturnRegister(Location trg, DataType::Type type) = 0; virtual void GenerateNop() = 0; @@ -570,14 +618,18 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { protected: // Patch info used for recording locations of required linker patches and their targets, - // i.e. target method, string, type or code identified by their dex file and index. + // i.e. target method, string, type or code identified by their dex file and index, + // or .data.bimg.rel.ro entries identified by the boot image offset. template <typename LabelType> struct PatchInfo { - PatchInfo(const DexFile& target_dex_file, uint32_t target_index) - : dex_file(target_dex_file), index(target_index) { } - - const DexFile& dex_file; - uint32_t index; + PatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : target_dex_file(dex_file), offset_or_index(off_or_idx), label() { } + + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. + uint32_t offset_or_index; + // Label for the instruction to patch. LabelType label; }; @@ -588,38 +640,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { uint32_t core_callee_save_mask, uint32_t fpu_callee_save_mask, const CompilerOptions& compiler_options, - OptimizingCompilerStats* stats) - : frame_size_(0), - core_spill_mask_(0), - fpu_spill_mask_(0), - first_register_slot_in_slow_path_(0), - allocated_registers_(RegisterSet::Empty()), - blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers, - kArenaAllocCodeGenerator)), - blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers, - kArenaAllocCodeGenerator)), - number_of_core_registers_(number_of_core_registers), - number_of_fpu_registers_(number_of_fpu_registers), - number_of_register_pairs_(number_of_register_pairs), - core_callee_save_mask_(core_callee_save_mask), - fpu_callee_save_mask_(fpu_callee_save_mask), - stack_map_stream_(graph->GetArena(), graph->GetInstructionSet()), - block_order_(nullptr), - jit_string_roots_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_roots_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - disasm_info_(nullptr), - stats_(stats), - graph_(graph), - compiler_options_(compiler_options), - slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - current_slow_path_(nullptr), - current_block_index_(0), - is_leaf_(true), - requires_current_method_(false) { - slow_paths_.reserve(8); - } + OptimizingCompilerStats* stats); virtual HGraphVisitor* GetLocationBuilder() = 0; virtual HGraphVisitor* GetInstructionVisitor() = 0; @@ -646,7 +667,7 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { bool CallPushesPC() const { InstructionSet instruction_set = GetInstructionSet(); - return instruction_set == kX86 || instruction_set == kX86_64; + return instruction_set == InstructionSet::kX86 || instruction_set == InstructionSet::kX86_64; } // Arm64 has its own type for a label, so we need to templatize these methods @@ -657,8 +678,8 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // We use raw array allocations instead of ArenaVector<> because Labels are // non-constructible and non-movable and as such cannot be held in a vector. size_t size = GetGraph()->GetBlocks().size(); - LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, - kArenaAllocCodeGenerator); + LabelType* labels = + GetGraph()->GetAllocator()->AllocArray<LabelType>(size, kArenaAllocCodeGenerator); for (size_t i = 0; i != size; ++i) { new(labels + i) LabelType(); } @@ -675,12 +696,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { return current_slow_path_; } + StackMapStream* GetStackMapStream(); + + void ReserveJitStringRoot(StringReference string_reference, Handle<mirror::String> string); + uint64_t GetJitStringRootIndex(StringReference string_reference); + void ReserveJitClassRoot(TypeReference type_reference, Handle<mirror::Class> klass); + uint64_t GetJitClassRootIndex(TypeReference type_reference); + // Emit the patches assocatied with JIT roots. Only applies to JIT compiled code. - virtual void EmitJitRootPatches(uint8_t* code ATTRIBUTE_UNUSED, - const uint8_t* roots_data ATTRIBUTE_UNUSED) { - DCHECK_EQ(jit_string_roots_.size(), 0u); - DCHECK_EQ(jit_class_roots_.size(), 0u); - } + virtual void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data); // Frame size required for this method. uint32_t frame_size_; @@ -702,24 +726,15 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { const uint32_t core_callee_save_mask_; const uint32_t fpu_callee_save_mask_; - StackMapStream stack_map_stream_; - // The order to use for code generation. const ArenaVector<HBasicBlock*>* block_order_; - // Maps a StringReference (dex_file, string_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ArenaSafeMap<StringReference, uint64_t, StringReferenceValueComparator> jit_string_roots_; - - // Maps a ClassReference (dex_file, type_index) to the index in the literal table. - // Entries are intially added with a pointer in the handle zone, and `EmitJitRoots` - // will compute all the indices. - ArenaSafeMap<TypeReference, uint64_t, TypeReferenceValueComparator> jit_class_roots_; - DisassemblyInformation* disasm_info_; private: + class CodeGenerationData; + + void InitializeCodeGenerationData(); size_t GetStackOffsetOfSavedRegister(size_t index); void GenerateSlowPaths(); void BlockIfInRegister(Location location, bool is_out = false) const; @@ -730,8 +745,6 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { HGraph* const graph_; const CompilerOptions& compiler_options_; - ArenaVector<std::unique_ptr<SlowPathCode>> slow_paths_; - // The current slow-path that we're generating code for. SlowPathCode* current_slow_path_; @@ -747,6 +760,12 @@ class CodeGenerator : public DeletableArenaObject<kArenaAllocCodeGenerator> { // needs the environment including a valid stack frame. bool requires_current_method_; + // The CodeGenerationData contains a ScopedArenaAllocator intended for reusing the + // ArenaStack memory allocated in previous passes instead of adding to the memory + // held by the ArenaAllocator. This ScopedArenaAllocator is created in + // CodeGenerator::Compile() and remains alive until the CodeGenerator is destroyed. + std::unique_ptr<CodeGenerationData> code_generation_data_; + friend class OptimizingCFITest; DISALLOW_COPY_AND_ASSIGN(CodeGenerator); @@ -812,7 +831,8 @@ class SlowPathGenerator { SlowPathGenerator(HGraph* graph, CodeGenerator* codegen) : graph_(graph), codegen_(codegen), - slow_path_map_(std::less<uint32_t>(), graph->GetArena()->Adapter(kArenaAllocSlowPaths)) {} + slow_path_map_(std::less<uint32_t>(), + graph->GetAllocator()->Adapter(kArenaAllocSlowPaths)) {} // Creates and adds a new slow-path, if needed, or returns existing one otherwise. // Templating the method (rather than the whole class) on the slow-path type enables @@ -846,10 +866,12 @@ class SlowPathGenerator { } } else { // First time this dex-pc is seen. - iter = slow_path_map_.Put(dex_pc, {{}, {graph_->GetArena()->Adapter(kArenaAllocSlowPaths)}}); + iter = slow_path_map_.Put(dex_pc, + {{}, {graph_->GetAllocator()->Adapter(kArenaAllocSlowPaths)}}); } // Cannot share: create and add new slow-path for this particular dex-pc. - SlowPathCodeType* slow_path = new (graph_->GetArena()) SlowPathCodeType(instruction); + SlowPathCodeType* slow_path = + new (codegen_->GetScopedAllocator()) SlowPathCodeType(instruction); iter->second.emplace_back(std::make_pair(instruction, slow_path)); codegen_->AddSlowPath(slow_path); return slow_path; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7e5b1a0fd1..60f8f98757 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -21,17 +21,20 @@ #include "art_method.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_arm64.h" #include "linker/arm64/relative_patcher_arm64.h" +#include "linker/linker_patch.h" +#include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" -#include "lock_word.h" #include "offsets.h" #include "thread.h" #include "utils/arm64/assembler_arm64.h" @@ -141,24 +144,24 @@ inline Condition ARM64FPCondition(IfCondition cond, bool gt_bias) { } } -Location ARM64ReturnLocation(Primitive::Type return_type) { +Location ARM64ReturnLocation(DataType::Type return_type) { // Note that in practice, `LocationFrom(x0)` and `LocationFrom(w0)` create the // same Location object, and so do `LocationFrom(d0)` and `LocationFrom(s0)`, // but we use the exact registers for clarity. - if (return_type == Primitive::kPrimFloat) { + if (return_type == DataType::Type::kFloat32) { return LocationFrom(s0); - } else if (return_type == Primitive::kPrimDouble) { + } else if (return_type == DataType::Type::kFloat64) { return LocationFrom(d0); - } else if (return_type == Primitive::kPrimLong) { + } else if (return_type == DataType::Type::kInt64) { return LocationFrom(x0); - } else if (return_type == Primitive::kPrimVoid) { + } else if (return_type == DataType::Type::kVoid) { return Location::NoLocation(); } else { return LocationFrom(w0); } } -Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type return_type) { +Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type return_type) { return ARM64ReturnLocation(return_type); } @@ -262,9 +265,12 @@ class BoundsCheckSlowPathARM64 : public SlowPathCodeARM64 { // We're moving two locations to locations that could overlap, so we need a parallel // move resolver. InvokeRuntimeCallingConvention calling_convention; - codegen->EmitParallelMoves( - locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimInt, - locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimInt); + codegen->EmitParallelMoves(locations->InAt(0), + LocationFrom(calling_convention.GetRegisterAt(0)), + DataType::Type::kInt32, + locations->InAt(1), + LocationFrom(calling_convention.GetRegisterAt(1)), + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -305,40 +311,23 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { LoadClassSlowPathARM64(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, - bool do_clinit, - vixl::aarch64::Register bss_entry_temp = vixl::aarch64::Register(), - vixl::aarch64::Label* bss_entry_adrp_label = nullptr) + bool do_clinit) : SlowPathCodeARM64(at), cls_(cls), dex_pc_(dex_pc), - do_clinit_(do_clinit), - bss_entry_temp_(bss_entry_temp), - bss_entry_adrp_label_(bss_entry_adrp_label) { + do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); - constexpr bool call_saves_everything_except_r0_ip0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - // For HLoadClass/kBssEntry/kSaveEverything, the page address of the entry is in a temp - // register, make sure it's not clobbered by the call or by saving/restoring registers. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - bool is_load_class_bss_entry = - (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); - if (is_load_class_bss_entry) { - DCHECK(bss_entry_temp_.IsValid()); - DCHECK(!bss_entry_temp_.Is(calling_convention.GetRegisterAt(0))); - DCHECK( - !UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(bss_entry_temp_)); - } - __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; dex::TypeIndex type_index = cls_->GetTypeIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), type_index.index_); QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage @@ -353,30 +342,10 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(out, calling_convention.GetReturnLocation(type), type); } RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - if (is_load_class_bss_entry) { - DCHECK(out.IsValid()); - const DexFile& dex_file = cls_->GetDexFile(); - if (call_saves_everything_except_r0_ip0) { - // The class entry page address was preserved in bss_entry_temp_ thanks to kSaveEverything. - } else { - // For non-Baker read barrier, we need to re-calculate the address of the class entry page. - bss_entry_adrp_label_ = arm64_codegen->NewBssEntryTypePatch(dex_file, type_index); - arm64_codegen->EmitAdrpPlaceholder(bss_entry_adrp_label_, bss_entry_temp_); - } - vixl::aarch64::Label* strp_label = - arm64_codegen->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label_); - { - SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); - __ Bind(strp_label); - __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), - MemOperand(bss_entry_temp_, /* offset placeholder */ 0)); - } - } __ B(GetExitLabel()); } @@ -392,70 +361,38 @@ class LoadClassSlowPathARM64 : public SlowPathCodeARM64 { // Whether to initialize the class. const bool do_clinit_; - // For HLoadClass/kBssEntry, the temp register and the label of the ADRP where it was loaded. - vixl::aarch64::Register bss_entry_temp_; - vixl::aarch64::Label* bss_entry_adrp_label_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARM64); }; class LoadStringSlowPathARM64 : public SlowPathCodeARM64 { public: - LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label) - : SlowPathCodeARM64(instruction), - temp_(temp), - adrp_label_(adrp_label) {} + explicit LoadStringSlowPathARM64(HLoadString* instruction) + : SlowPathCodeARM64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - InvokeRuntimeCallingConvention calling_convention; - // Make sure `temp_` is not clobbered by the call or by saving/restoring registers. - DCHECK(temp_.IsValid()); - DCHECK(!temp_.Is(calling_convention.GetRegisterAt(0))); - DCHECK(!UseScratchRegisterScope(arm64_codegen->GetVIXLAssembler()).IsAvailable(temp_)); - __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); + InvokeRuntimeCallingConvention calling_convention; const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); __ Mov(calling_convention.GetRegisterAt(0).W(), string_index.index_); arm64_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); arm64_codegen->MoveLocation(locations->Out(), calling_convention.GetReturnLocation(type), type); RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile(); - if (!kUseReadBarrier || kUseBakerReadBarrier) { - // The string entry page address was preserved in temp_ thanks to kSaveEverything. - } else { - // For non-Baker read barrier, we need to re-calculate the address of the string entry page. - adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index); - arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_); - } - vixl::aarch64::Label* strp_label = - arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_); - { - SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler()); - __ Bind(strp_label); - __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot), - MemOperand(temp_, /* offset placeholder */ 0)); - } - __ B(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; } private: - const Register temp_; - vixl::aarch64::Label* adrp_label_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64); }; @@ -541,7 +478,7 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -550,14 +487,14 @@ class TypeCheckSlowPathARM64 : public SlowPathCodeARM64 { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); - Primitive::Type ret_type = instruction_->GetType(); + DataType::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { @@ -614,21 +551,21 @@ class ArraySetSlowPathARM64 : public SlowPathCodeARM64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), LocationFrom(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -1197,7 +1134,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsInstanceFieldGet() || @@ -1226,7 +1163,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics. if (instruction_->IsArrayGet()) { // Compute the actual memory offset and store it in `index`. - Register index_reg = RegisterFrom(index_, Primitive::kPrimInt); + Register index_reg = RegisterFrom(index_, DataType::Type::kInt32); DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_.reg())); if (codegen->IsCoreCalleeSaveRegister(index_.reg())) { // We are about to change the value of `index_reg` (see the @@ -1265,7 +1202,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // factor (2) cannot overflow in practice, as the runtime is // unable to allocate object arrays with a size larger than // 2^26 - 1 (that is, 2^28 - 4 bytes). - __ Lsl(index_reg, index_reg, Primitive::ComponentSizeShift(type)); + __ Lsl(index_reg, index_reg, DataType::SizeShift(type)); static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -1288,7 +1225,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, LocationFrom(calling_convention.GetRegisterAt(0)), type, @@ -1300,7 +1237,7 @@ class ReadBarrierForHeapReferenceSlowPathARM64 : public SlowPathCodeARM64 { if (index.IsValid()) { parallel_move.AddMove(index, LocationFrom(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -1362,7 +1299,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg())); DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString()) @@ -1384,7 +1321,7 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { // type); // // which would emit a 32-bit move, as `type` is a (32-bit wide) - // reference type (`Primitive::kPrimNot`). + // reference type (`DataType::Type::kReference`). __ Mov(calling_convention.GetRegisterAt(0), XRegisterFrom(out_)); arm64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, @@ -1408,26 +1345,26 @@ class ReadBarrierForRootSlowPathARM64 : public SlowPathCodeARM64 { #undef __ -Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) { Location next_location; - if (type == Primitive::kPrimVoid) { + if (type == DataType::Type::kVoid) { LOG(FATAL) << "Unreachable type " << type; } - if (Primitive::IsFloatingPointType(type) && + if (DataType::IsFloatingPointType(type) && (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { next_location = LocationFrom(calling_convention.GetFpuRegisterAt(float_index_++)); - } else if (!Primitive::IsFloatingPointType(type) && + } else if (!DataType::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = LocationFrom(calling_convention.GetRegisterAt(gp_index_++)); } else { size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) - : Location::StackSlot(stack_offset); + next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) + : Location::StackSlot(stack_offset); } // Space on the stack is reserved for all arguments. - stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; + stack_index_ += DataType::Is64BitType(type) ? 2 : 1; return next_location; } @@ -1447,27 +1384,28 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, callee_saved_fp_registers.GetList(), compiler_options, stats), - block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator()), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save the link register (containing the return address) to mimic Quick. AddAllocatedRegister(LocationFrom(lr)); } @@ -1543,19 +1481,28 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { void ParallelMoveResolverARM64::EmitMove(size_t index) { MoveOperands* move = moves_[index]; - codegen_->MoveLocation(move->GetDestination(), move->GetSource(), Primitive::kPrimVoid); + codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } void CodeGeneratorARM64::GenerateFrameEntry() { MacroAssembler* masm = GetVIXLAssembler(); __ Bind(&frame_entry_label_); - bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod(); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(masm); + Register temp = temps.AcquireX(); + __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + + bool do_overflow_check = + FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); if (do_overflow_check) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kArm64))); { // Ensure that between load and RecordPcInfo there are no pools emitted. ExactAssemblyScope eas(GetVIXLAssembler(), @@ -1595,6 +1542,8 @@ void CodeGeneratorARM64::GenerateFrameEntry() { __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } + + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateFrameExit() { @@ -1632,7 +1581,7 @@ void CodeGeneratorARM64::Bind(HBasicBlock* block) { void CodeGeneratorARM64::MoveConstant(Location location, int32_t value) { DCHECK(location.IsRegister()); - __ Mov(RegisterFrom(location, Primitive::kPrimInt), value); + __ Mov(RegisterFrom(location, DataType::Type::kInt32), value); } void CodeGeneratorARM64::AddLocationAsTemp(Location location, LocationSummary* locations) { @@ -1739,15 +1688,15 @@ void CodeGeneratorARM64::MoveConstant(CPURegister destination, HConstant* consta } -static bool CoherentConstantAndType(Location constant, Primitive::Type type) { +static bool CoherentConstantAndType(Location constant, DataType::Type type) { DCHECK(constant.IsConstant()); HConstant* cst = constant.GetConstant(); - return (cst->IsIntConstant() && type == Primitive::kPrimInt) || + return (cst->IsIntConstant() && type == DataType::Type::kInt32) || // Null is mapped to a core W register, which we associate with kPrimInt. - (cst->IsNullConstant() && type == Primitive::kPrimInt) || - (cst->IsLongConstant() && type == Primitive::kPrimLong) || - (cst->IsFloatConstant() && type == Primitive::kPrimFloat) || - (cst->IsDoubleConstant() && type == Primitive::kPrimDouble); + (cst->IsNullConstant() && type == DataType::Type::kInt32) || + (cst->IsLongConstant() && type == DataType::Type::kInt64) || + (cst->IsFloatConstant() && type == DataType::Type::kFloat32) || + (cst->IsDoubleConstant() && type == DataType::Type::kFloat64); } // Allocate a scratch register from the VIXL pool, querying first @@ -1765,7 +1714,7 @@ static CPURegister AcquireFPOrCoreCPURegisterOfSize(vixl::aarch64::MacroAssemble void CodeGeneratorARM64::MoveLocation(Location destination, Location source, - Primitive::Type dst_type) { + DataType::Type dst_type) { if (source.Equals(destination)) { return; } @@ -1774,7 +1723,7 @@ void CodeGeneratorARM64::MoveLocation(Location destination, // locations. When moving from and to a register, the argument type can be // used to generate 32bit instead of 64bit moves. In debug mode we also // checks the coherency of the locations and the type. - bool unspecified_type = (dst_type == Primitive::kPrimVoid); + bool unspecified_type = (dst_type == DataType::Type::kVoid); if (destination.IsRegister() || destination.IsFpuRegister()) { if (unspecified_type) { @@ -1784,17 +1733,17 @@ void CodeGeneratorARM64::MoveLocation(Location destination, || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. - dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; + dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; } else { // If the source is a double stack slot or a 64bit constant, a 64bit // type is appropriate. Else the source is a register, and since the // type has not been specified, we chose a 64bit type to force a 64bit // move. - dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; + dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; } } - DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || - (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); + DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); CPURegister dst = CPURegisterFrom(destination, dst_type); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { DCHECK(dst.Is64Bits() == source.IsDoubleStackSlot()); @@ -1809,17 +1758,17 @@ void CodeGeneratorARM64::MoveLocation(Location destination, __ Mov(Register(dst), RegisterFrom(source, dst_type)); } else { DCHECK(destination.IsFpuRegister()); - Primitive::Type source_type = Primitive::Is64BitType(dst_type) - ? Primitive::kPrimLong - : Primitive::kPrimInt; + DataType::Type source_type = DataType::Is64BitType(dst_type) + ? DataType::Type::kInt64 + : DataType::Type::kInt32; __ Fmov(FPRegisterFrom(destination, dst_type), RegisterFrom(source, source_type)); } } else { DCHECK(source.IsFpuRegister()); if (destination.IsRegister()) { - Primitive::Type source_type = Primitive::Is64BitType(dst_type) - ? Primitive::kPrimDouble - : Primitive::kPrimFloat; + DataType::Type source_type = DataType::Is64BitType(dst_type) + ? DataType::Type::kFloat64 + : DataType::Type::kFloat32; __ Fmov(RegisterFrom(destination, dst_type), FPRegisterFrom(source, source_type)); } else { DCHECK(destination.IsFpuRegister()); @@ -1853,13 +1802,14 @@ void CodeGeneratorARM64::MoveLocation(Location destination, if (source.IsRegister() || source.IsFpuRegister()) { if (unspecified_type) { if (source.IsRegister()) { - dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; + dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; } else { - dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; + dst_type = + destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; } } - DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && - (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); + DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && + (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); __ Str(CPURegisterFrom(source, dst_type), StackOperandFrom(destination)); } else if (source.IsConstant()) { DCHECK(unspecified_type || CoherentConstantAndType(source, dst_type)) @@ -1914,31 +1864,34 @@ void CodeGeneratorARM64::MoveLocation(Location destination, } } -void CodeGeneratorARM64::Load(Primitive::Type type, +void CodeGeneratorARM64::Load(DataType::Type type, CPURegister dst, const MemOperand& src) { switch (type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: __ Ldrb(Register(dst), src); break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: __ Ldrsb(Register(dst), src); break; - case Primitive::kPrimShort: - __ Ldrsh(Register(dst), src); - break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: __ Ldrh(Register(dst), src); break; - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); + case DataType::Type::kInt16: + __ Ldrsh(Register(dst), src); + break; + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); __ Ldr(dst, src); break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } @@ -1950,7 +1903,7 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, MacroAssembler* masm = GetVIXLAssembler(); UseScratchRegisterScope temps(masm); Register temp_base = temps.AcquireX(); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); DCHECK(!src.IsPreIndex()); DCHECK(!src.IsPostIndex()); @@ -1961,7 +1914,9 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. MemOperand base = MemOperand(temp_base); switch (type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldarb(Register(dst), base); @@ -1969,18 +1924,12 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, MaybeRecordImplicitNullCheck(instruction); } } - break; - case Primitive::kPrimByte: - { - ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); - __ ldarb(Register(dst), base); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } + if (type == DataType::Type::kInt8) { + __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); } - __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: + case DataType::Type::kInt16: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldarh(Register(dst), base); @@ -1988,21 +1937,14 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, MaybeRecordImplicitNullCheck(instruction); } } - break; - case Primitive::kPrimShort: - { - ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); - __ ldarh(Register(dst), base); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } + if (type == DataType::Type::kInt16) { + __ Sbfx(Register(dst), Register(dst), 0, DataType::Size(type) * kBitsPerByte); } - __ Sbfx(Register(dst), Register(dst), 0, Primitive::ComponentSize(type) * kBitsPerByte); break; - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: - DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kInt64: + DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ ldar(Register(dst), base); @@ -2011,10 +1953,10 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, } } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { DCHECK(dst.IsFPRegister()); - DCHECK_EQ(dst.Is64Bits(), Primitive::Is64BitType(type)); + DCHECK_EQ(dst.Is64Bits(), DataType::Is64BitType(type)); Register temp = dst.Is64Bits() ? temps.AcquireX() : temps.AcquireW(); { @@ -2027,39 +1969,44 @@ void CodeGeneratorARM64::LoadAcquire(HInstruction* instruction, __ Fmov(FPRegister(dst), temp); break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } } -void CodeGeneratorARM64::Store(Primitive::Type type, +void CodeGeneratorARM64::Store(DataType::Type type, CPURegister src, const MemOperand& dst) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: __ Strb(Register(src), dst); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: __ Strh(Register(src), dst); break; - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); __ Str(src, dst); break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, - Primitive::Type type, + DataType::Type type, CPURegister src, const MemOperand& dst, bool needs_null_check) { @@ -2076,8 +2023,9 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, MemOperand base = MemOperand(temp_base); // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlrb(Register(src), base); @@ -2086,8 +2034,8 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } } break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlrh(Register(src), base); @@ -2096,10 +2044,10 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } } break; - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: - DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kInt64: + DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); { ExactAssemblyScope eas(masm, kInstructionSize, CodeBufferCheckScope::kExactSize); __ stlr(Register(src), base); @@ -2108,9 +2056,9 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - DCHECK_EQ(src.Is64Bits(), Primitive::Is64BitType(type)); + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + DCHECK_EQ(src.Is64Bits(), DataType::Is64BitType(type)); Register temp_src; if (src.IsZero()) { // The zero register is used to avoid synthesizing zero constants. @@ -2129,7 +2077,9 @@ void CodeGeneratorARM64::StoreRelease(HInstruction* instruction, } break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; } } @@ -2163,14 +2113,18 @@ void InstructionCodeGeneratorARM64::GenerateClassInitializationCheck(SlowPathCod Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireW(); - size_t status_offset = mirror::Class::StatusOffset().SizeValue(); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); // Even if the initialized flag is set, we need to ensure consistent memory ordering. // TODO(vixl): Let the MacroAssembler handle MemOperand. - __ Add(temp, class_reg, status_offset); - __ Ldar(temp, HeapOperand(temp)); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); + __ Add(temp, class_reg, status_byte_offset); + __ Ldarb(temp, HeapOperand(temp)); + __ Cmp(temp, shifted_initialized_value); + __ B(lo, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); } @@ -2202,12 +2156,12 @@ void InstructionCodeGeneratorARM64::GenerateSuspendCheck(HSuspendCheck* instruct SuspendCheckSlowPathARM64* slow_path = down_cast<SuspendCheckSlowPathARM64*>(instruction->GetSlowPath()); if (slow_path == nullptr) { - slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARM64(instruction, successor); + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARM64(instruction, successor); instruction->SetSlowPath(slow_path); codegen_->AddSlowPath(slow_path); if (successor != nullptr) { DCHECK(successor->IsLoopHeader()); - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); } } else { DCHECK_EQ(slow_path->GetSuccessor(), successor); @@ -2233,47 +2187,20 @@ InstructionCodeGeneratorARM64::InstructionCodeGeneratorARM64(HGraph* graph, assembler_(codegen->GetAssembler()), codegen_(codegen) {} -#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M) \ - /* No unimplemented IR. */ - -#define UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name) name##UnimplementedInstructionBreakCode - -enum UnimplementedInstructionBreakCode { - // Using a base helps identify when we hit such breakpoints. - UnimplementedInstructionBreakCodeBaseCode = 0x900, -#define ENUM_UNIMPLEMENTED_INSTRUCTION(name) UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name), - FOR_EACH_UNIMPLEMENTED_INSTRUCTION(ENUM_UNIMPLEMENTED_INSTRUCTION) -#undef ENUM_UNIMPLEMENTED_INSTRUCTION -}; - -#define DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS(name) \ - void InstructionCodeGeneratorARM64::Visit##name(H##name* instr ATTRIBUTE_UNUSED) { \ - __ Brk(UNIMPLEMENTED_INSTRUCTION_BREAK_CODE(name)); \ - } \ - void LocationsBuilderARM64::Visit##name(H##name* instr) { \ - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); \ - locations->SetOut(Location::Any()); \ - } - FOR_EACH_UNIMPLEMENTED_INSTRUCTION(DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS) -#undef DEFINE_UNIMPLEMENTED_INSTRUCTION_VISITORS - -#undef UNIMPLEMENTED_INSTRUCTION_BREAK_CODE -#undef FOR_EACH_UNIMPLEMENTED_INSTRUCTION - void LocationsBuilderARM64::HandleBinaryOp(HBinaryOperation* instr) { DCHECK_EQ(instr->InputCount(), 2U); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - Primitive::Type type = instr->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + DataType::Type type = instr->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(instr->InputAt(1), instr)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -2289,12 +2216,12 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_field_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier marking slow @@ -2312,7 +2239,7 @@ void LocationsBuilderARM64::HandleFieldGet(HInstruction* instruction, } } locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps for an object field get when read barriers @@ -2331,13 +2258,15 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, Location base_loc = locations->InAt(0); Location out = locations->Out(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - Primitive::Type field_type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type load_type = instruction->GetType(); MemOperand field = HeapOperand(InputRegisterAt(instruction, 0), field_info.GetFieldOffset()); - if (field_type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && + load_type == DataType::Type::kReference) { // Object FieldGet with Baker's read barrier case. // /* HeapReference<Object> */ out = *(base + offset) - Register base = RegisterFrom(base_loc, Primitive::kPrimNot); + Register base = RegisterFrom(base_loc, DataType::Type::kReference); Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); // Note that potential implicit null checks are handled in this @@ -2361,10 +2290,10 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, } else { // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted. EmissionCheckScope guard(GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes); - codegen_->Load(field_type, OutputCPURegister(instruction), field); + codegen_->Load(load_type, OutputCPURegister(instruction), field); codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (field_type == Primitive::kPrimNot) { + if (load_type == DataType::Type::kReference) { // If read barriers are enabled, emit read barriers other than // Baker's using a slow path (and also unpoison the loaded // reference, if heap poisoning is enabled). @@ -2375,11 +2304,11 @@ void InstructionCodeGeneratorARM64::HandleFieldGet(HInstruction* instruction, void LocationsBuilderARM64::HandleFieldSet(HInstruction* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (IsConstantZeroBitPattern(instruction->InputAt(1))) { locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); - } else if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + } else if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::RequiresRegister()); @@ -2395,14 +2324,14 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 1); CPURegister source = value; Offset offset = field_info.GetFieldOffset(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); { // We use a block to end the scratch scope before the write barrier, thus // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(GetVIXLAssembler()); - if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { DCHECK(value.IsW()); Register temp = temps.AcquireW(); __ Mov(temp, value.W()); @@ -2427,11 +2356,11 @@ void InstructionCodeGeneratorARM64::HandleFieldSet(HInstruction* instruction, } void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { - Primitive::Type type = instr->GetType(); + DataType::Type type = instr->GetType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { Register dst = OutputRegister(instr); Register lhs = InputRegisterAt(instr, 0); Operand rhs = InputOperandAt(instr, 1); @@ -2460,8 +2389,8 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FPRegister dst = OutputFPRegister(instr); FPRegister lhs = InputFPRegisterAt(instr, 0); FPRegister rhs = InputFPRegisterAt(instr, 1); @@ -2482,11 +2411,11 @@ void InstructionCodeGeneratorARM64::HandleBinaryOp(HBinaryOperation* instr) { void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - Primitive::Type type = instr->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + DataType::Type type = instr->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2500,16 +2429,16 @@ void LocationsBuilderARM64::HandleShift(HBinaryOperation* instr) { void InstructionCodeGeneratorARM64::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr()); - Primitive::Type type = instr->GetType(); + DataType::Type type = instr->GetType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { Register dst = OutputRegister(instr); Register lhs = InputRegisterAt(instr, 0); Operand rhs = InputOperandAt(instr, 1); if (rhs.IsImmediate()) { uint32_t shift_value = rhs.GetImmediate() & - (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance); + (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); if (instr->IsShl()) { __ Lsl(dst, lhs, shift_value); } else if (instr->IsShr()) { @@ -2552,8 +2481,8 @@ void InstructionCodeGeneratorARM64::VisitAnd(HAnd* instruction) { } void LocationsBuilderARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instr) { - DCHECK(Primitive::IsIntegralType(instr->GetType())) << instr->GetType(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); + DCHECK(DataType::IsIntegralType(instr->GetType())) << instr->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); locations->SetInAt(0, Location::RequiresRegister()); // There is no immediate variant of negated bitwise instructions in AArch64. locations->SetInAt(1, Location::RequiresRegister()); @@ -2582,10 +2511,10 @@ void InstructionCodeGeneratorARM64::VisitBitwiseNegatedRight(HBitwiseNegatedRigh void LocationsBuilderARM64::VisitDataProcWithShifterOp( HDataProcWithShifterOp* instruction) { - DCHECK(instruction->GetType() == Primitive::kPrimInt || - instruction->GetType() == Primitive::kPrimLong); + DCHECK(instruction->GetType() == DataType::Type::kInt32 || + instruction->GetType() == DataType::Type::kInt64); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); if (instruction->GetInstrKind() == HInstruction::kNeg) { locations->SetInAt(0, Location::ConstantLocation(instruction->InputAt(0)->AsConstant())); } else { @@ -2597,9 +2526,9 @@ void LocationsBuilderARM64::VisitDataProcWithShifterOp( void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( HDataProcWithShifterOp* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); HInstruction::InstructionKind kind = instruction->GetInstrKind(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); Register out = OutputRegister(instruction); Register left; if (kind != HInstruction::kNeg) { @@ -2656,7 +2585,7 @@ void InstructionCodeGeneratorARM64::VisitDataProcWithShifterOp( void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2670,7 +2599,7 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddres void LocationsBuilderARM64::VisitIntermediateAddressIndex(HIntermediateAddressIndex* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); HIntConstant* shift = instruction->GetShift()->AsIntConstant(); @@ -2702,7 +2631,7 @@ void InstructionCodeGeneratorARM64::VisitIntermediateAddressIndex( void LocationsBuilderARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); HInstruction* accumulator = instr->InputAt(HMultiplyAccumulate::kInputAccumulatorIndex); if (instr->GetOpKind() == HInstruction::kSub && accumulator->IsConstant() && @@ -2725,7 +2654,7 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* // Avoid emitting code that could trigger Cortex A53's erratum 835769. // This fixup should be carried out for all multiply-accumulate instructions: // madd, msub, smaddl, smsubl, umaddl and umsubl. - if (instr->GetType() == Primitive::kPrimLong && + if (instr->GetType() == DataType::Type::kInt64 && codegen_->GetInstructionSetFeatures().NeedFixCortexA53_835769()) { MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen_)->GetVIXLAssembler(); vixl::aarch64::Instruction* prev = @@ -2754,12 +2683,12 @@ void InstructionCodeGeneratorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier marking slow @@ -2772,7 +2701,7 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { // constant index loads we need a temporary only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); - offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(FixedTempLocation()); } @@ -2782,7 +2711,7 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps in the case of an object array get with @@ -2795,7 +2724,7 @@ void LocationsBuilderARM64::VisitArrayGet(HArrayGet* instruction) { } void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); Register obj = InputRegisterAt(instruction, 0); LocationSummary* locations = instruction->GetLocations(); Location index = locations->InAt(1); @@ -2808,18 +2737,18 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { // The read barrier instrumentation of object ArrayGet instructions // does not support the HIntermediateAddress instruction. - DCHECK(!((type == Primitive::kPrimNot) && + DCHECK(!((type == DataType::Type::kReference) && instruction->GetArray()->IsIntermediateAddress() && kEmitCompilerReadBarrier)); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Object ArrayGet with Baker's read barrier case. // Note that a potential implicit null check is handled in the // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call. DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + offset += Int64ConstantFrom(index) << DataType::SizeShift(type); Location maybe_temp = (locations->GetTempCount() != 0) ? locations->GetTemp(0) : Location::NoLocation(); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -2871,7 +2800,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { HeapOperand(obj, offset + (Int64ConstantFrom(index) << 1))); __ Bind(&done); } else { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(type); + offset += Int64ConstantFrom(index) << DataType::SizeShift(type); source = HeapOperand(obj, offset); } } else { @@ -2901,7 +2830,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { HeapOperand(temp, XRegisterFrom(index), LSL, 1)); __ Bind(&done); } else { - source = HeapOperand(temp, XRegisterFrom(index), LSL, Primitive::ComponentSizeShift(type)); + source = HeapOperand(temp, XRegisterFrom(index), LSL, DataType::SizeShift(type)); } } if (!maybe_compressed_char_at) { @@ -2911,7 +2840,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { codegen_->MaybeRecordImplicitNullCheck(instruction); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -2926,7 +2855,7 @@ void InstructionCodeGeneratorARM64::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderARM64::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -2947,10 +2876,10 @@ void InstructionCodeGeneratorARM64::VisitArrayLength(HArrayLength* instruction) } void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : @@ -2959,7 +2888,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (IsConstantZeroBitPattern(instruction->InputAt(2))) { locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant())); - } else if (Primitive::IsFloatingPointType(value_type)) { + } else if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -2967,7 +2896,7 @@ void LocationsBuilderARM64::VisitArraySet(HArraySet* instruction) { } void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); LocationSummary* locations = instruction->GetLocations(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = @@ -2977,14 +2906,14 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { CPURegister value = InputCPURegisterOrZeroRegAt(instruction, 2); CPURegister source = value; Location index = locations->InAt(1); - size_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); + size_t offset = mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); MemOperand destination = HeapOperand(array); MacroAssembler* masm = GetVIXLAssembler(); if (!needs_write_barrier) { DCHECK(!may_need_runtime_call_for_type_check); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { UseScratchRegisterScope temps(masm); @@ -3004,7 +2933,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { destination = HeapOperand(temp, XRegisterFrom(index), LSL, - Primitive::ComponentSizeShift(value_type)); + DataType::SizeShift(value_type)); } { // Ensure that between store and MaybeRecordImplicitNullCheck there are no pools emitted. @@ -3022,13 +2951,13 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { UseScratchRegisterScope temps(masm); Register temp = temps.AcquireSameSizeAs(array); if (index.IsConstant()) { - offset += Int64ConstantFrom(index) << Primitive::ComponentSizeShift(value_type); + offset += Int64ConstantFrom(index) << DataType::SizeShift(value_type); destination = HeapOperand(array, offset); } else { destination = HeapOperand(temp, XRegisterFrom(index), LSL, - Primitive::ComponentSizeShift(value_type)); + DataType::SizeShift(value_type)); } uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); @@ -3036,7 +2965,7 @@ void InstructionCodeGeneratorARM64::VisitArraySet(HArraySet* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARM64(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { vixl::aarch64::Label non_zero; @@ -3151,7 +3080,7 @@ void LocationsBuilderARM64::VisitBoundsCheck(HBoundsCheck* instruction) { void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) { BoundsCheckSlowPathARM64* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARM64(instruction); + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); __ Cmp(InputRegisterAt(instruction, 0), InputOperandAt(instruction, 1)); __ B(slow_path->GetEntryLabel(), hs); @@ -3159,7 +3088,7 @@ void InstructionCodeGeneratorARM64::VisitBoundsCheck(HBoundsCheck* instruction) void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -3168,7 +3097,7 @@ void LocationsBuilderARM64::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorARM64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( + SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); @@ -3207,22 +3136,23 @@ void InstructionCodeGeneratorARM64::GenerateFcmp(HInstruction* instruction) { void LocationsBuilderARM64::VisitCompare(HCompare* compare) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); - Primitive::Type in_type = compare->InputAt(0)->GetType(); + new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); + DataType::Type in_type = compare->InputAt(0)->GetType(); switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ARM64EncodableConstantOrRegister(compare->InputAt(1), compare)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, IsFloatingPointZeroConstant(compare->InputAt(1)) @@ -3237,18 +3167,19 @@ void LocationsBuilderARM64::VisitCompare(HCompare* compare) { } void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { - Primitive::Type in_type = compare->InputAt(0)->GetType(); + DataType::Type in_type = compare->InputAt(0)->GetType(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { Register result = OutputRegister(compare); Register left = InputRegisterAt(compare, 0); Operand right = InputOperandAt(compare, 1); @@ -3257,8 +3188,8 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { __ Cneg(result, result, lt); // result == -1 if LT or unchanged otherwise break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { Register result = OutputRegister(compare); GenerateFcmp(compare); __ Cset(result, ne); @@ -3271,9 +3202,9 @@ void InstructionCodeGeneratorARM64::VisitCompare(HCompare* compare) { } void LocationsBuilderARM64::HandleCondition(HCondition* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); - if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, IsFloatingPointZeroConstant(instruction->InputAt(1)) @@ -3299,7 +3230,7 @@ void InstructionCodeGeneratorARM64::HandleCondition(HCondition* instruction) { Register res = RegisterFrom(locations->Out(), instruction->GetType()); IfCondition if_cond = instruction->GetCondition(); - if (Primitive::IsFloatingPointType(instruction->InputAt(0)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(0)->GetType())) { GenerateFcmp(instruction); __ Cset(res, ARM64FPCondition(if_cond, instruction->IsGtBias())); } else { @@ -3378,7 +3309,7 @@ void InstructionCodeGeneratorARM64::DivRemByPowerOfTwo(HBinaryOperation* instruc __ Neg(out, Operand(out, ASR, ctz_imm)); } } else { - int bits = instruction->GetResultType() == Primitive::kPrimInt ? 32 : 64; + int bits = instruction->GetResultType() == DataType::Type::kInt32 ? 32 : 64; __ Asr(temp, dividend, bits - 1); __ Lsr(temp, temp, bits - ctz_imm); __ Add(out, dividend, temp); @@ -3398,19 +3329,20 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati Register dividend = InputRegisterAt(instruction, 0); int64_t imm = Int64FromConstant(second.GetConstant()); - Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); int64_t magic; int shift; - CalculateMagicAndShiftForDivRem(imm, type == Primitive::kPrimLong /* is_long */, &magic, &shift); + CalculateMagicAndShiftForDivRem( + imm, type == DataType::Type::kInt64 /* is_long */, &magic, &shift); UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireSameSizeAs(out); // temp = get_high(dividend * magic) __ Mov(temp, magic); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ Smulh(temp, dividend, temp); } else { __ Smull(temp.X(), dividend, temp); @@ -3428,9 +3360,9 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati } if (instruction->IsDiv()) { - __ Sub(out, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); + __ Sub(out, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); } else { - __ Sub(temp, temp, Operand(temp, ASR, type == Primitive::kPrimLong ? 63 : 31)); + __ Sub(temp, temp, Operand(temp, ASR, type == DataType::Type::kInt64 ? 63 : 31)); // TODO: Strength reduction for msub. Register temp_imm = temps.AcquireSameSizeAs(out); __ Mov(temp_imm, imm); @@ -3440,8 +3372,8 @@ void InstructionCodeGeneratorARM64::GenerateDivRemWithAnyConstant(HBinaryOperati void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); LocationSummary* locations = instruction->GetLocations(); Register out = OutputRegister(instruction); @@ -3476,17 +3408,17 @@ void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* ins void LocationsBuilderARM64::VisitDiv(HDiv* div) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); switch (div->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -3498,15 +3430,15 @@ void LocationsBuilderARM64::VisitDiv(HDiv* div) { } void InstructionCodeGeneratorARM64::VisitDiv(HDiv* div) { - Primitive::Type type = div->GetResultType(); + DataType::Type type = div->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: GenerateDivRemIntegral(div); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Fdiv(OutputFPRegister(div), InputFPRegisterAt(div, 0), InputFPRegisterAt(div, 1)); break; @@ -3522,13 +3454,13 @@ void LocationsBuilderARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction) { SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) DivZeroCheckSlowPathARM64(instruction); + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARM64(instruction); codegen_->AddSlowPath(slow_path); Location value = instruction->GetLocations()->InAt(0); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { + if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; return; } @@ -3548,7 +3480,7 @@ void InstructionCodeGeneratorARM64::VisitDivZeroCheck(HDivZeroCheck* instruction void LocationsBuilderARM64::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3566,7 +3498,7 @@ void InstructionCodeGeneratorARM64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { void LocationsBuilderARM64::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3575,18 +3507,31 @@ void InstructionCodeGeneratorARM64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp1 = temps.AcquireX(); + Register temp2 = temps.AcquireX(); + __ Ldr(temp1, MemOperand(sp, 0)); + __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp2, temp2, 1); + __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -3658,8 +3603,8 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); - Primitive::Type type = condition->InputAt(0)->GetType(); - if (Primitive::IsFloatingPointType(type)) { + DataType::Type type = condition->InputAt(0)->GetType(); + if (DataType::IsFloatingPointType(type)) { GenerateFcmp(condition); if (true_target == nullptr) { IfCondition opposite_condition = condition->GetOppositeCondition(); @@ -3719,7 +3664,7 @@ void InstructionCodeGeneratorARM64::GenerateTestAndBranch(HInstruction* instruct } void LocationsBuilderARM64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3740,7 +3685,7 @@ void InstructionCodeGeneratorARM64::VisitIf(HIf* if_instr) { } void LocationsBuilderARM64::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -3761,7 +3706,7 @@ void InstructionCodeGeneratorARM64::VisitDeoptimize(HDeoptimize* deoptimize) { } void LocationsBuilderARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -3773,7 +3718,7 @@ void InstructionCodeGeneratorARM64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeF static inline bool IsConditionOnFloatingPointValues(HInstruction* condition) { return condition->IsCondition() && - Primitive::IsFloatingPointType(condition->InputAt(0)->GetType()); + DataType::IsFloatingPointType(condition->InputAt(0)->GetType()); } static inline Condition GetConditionForSelect(HCondition* condition) { @@ -3783,8 +3728,8 @@ static inline Condition GetConditionForSelect(HCondition* condition) { } void LocationsBuilderARM64::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); + if (DataType::IsFloatingPointType(select->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -3838,7 +3783,7 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { csel_cond = GetConditionForSelect(cond->AsCondition()); } - if (Primitive::IsFloatingPointType(select->GetType())) { + if (DataType::IsFloatingPointType(select->GetType())) { __ Fcsel(OutputFPRegister(select), InputFPRegisterAt(select, 1), InputFPRegisterAt(select, 0), @@ -3852,7 +3797,7 @@ void InstructionCodeGeneratorARM64::VisitSelect(HSelect* select) { } void LocationsBuilderARM64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorARM64::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -3909,11 +3854,12 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -3921,7 +3867,8 @@ void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -3961,13 +3908,15 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Cmp(out, cls); __ Cset(out, eq); if (zero.IsLinked()) { @@ -3977,13 +3926,15 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl::aarch64::Label loop, success; @@ -3993,7 +3944,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Cmp(out, cls); @@ -4006,13 +3957,15 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. vixl::aarch64::Label loop, success; __ Bind(&loop); @@ -4023,7 +3976,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Cbnz(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -4036,13 +3989,15 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. vixl::aarch64::Label exact_check; __ Cmp(out, cls); @@ -4053,7 +4008,7 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Cbz(out, &done); __ Ldrh(out, HeapOperand(out, primitive_offset)); @@ -4076,8 +4031,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { kWithoutReadBarrier); __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -4108,8 +4063,8 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -4134,27 +4089,10 @@ void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) { } void LocationsBuilderARM64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64. @@ -4183,21 +4121,10 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - bool is_type_check_slow_path_fatal = false; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeARM64* type_check_slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARM64( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); vixl::aarch64::Label done; @@ -4365,7 +4292,7 @@ void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) { } void LocationsBuilderARM64::VisitIntConstant(HIntConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -4374,7 +4301,7 @@ void InstructionCodeGeneratorARM64::VisitIntConstant(HIntConstant* constant ATTR } void LocationsBuilderARM64::VisitNullConstant(HNullConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -4391,6 +4318,7 @@ void LocationsBuilderARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARM64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::HandleInvoke(HInvoke* invoke) { @@ -4459,10 +4387,12 @@ void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invok DCHECK(!codegen_->IsLeafMethod()); codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -4475,7 +4405,7 @@ void LocationsBuilderARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* inv // art::PrepareForRegisterAllocation. DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); - IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_); + IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetAllocator(), codegen_); if (intrinsic.TryDispatch(invoke)) { return; } @@ -4517,11 +4447,11 @@ void CodeGeneratorARM64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative method patch. - vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + vixl::aarch64::Label* adrp_label = NewBootImageMethodPatch(invoke->GetTargetMethod()); EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp)); // Add ADD with its PC-relative method patch. vixl::aarch64::Label* add_label = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label); + NewBootImageMethodPatch(invoke->GetTargetMethod(), adrp_label); EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp)); break; } @@ -4626,46 +4556,50 @@ void LocationsBuilderARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { void InstructionCodeGeneratorARM64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageMethodPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - adrp_label, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &boot_image_method_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch( MethodReference target_method, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - adrp_label, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, adrp_label, &method_bss_entry_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &boot_image_type_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBssEntryTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, vixl::aarch64::Label* adrp_label) { - return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_); +} + +vixl::aarch64::Label* CodeGeneratorARM64::NewBootImageStringPatch( + const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label) { + return NewPcRelativePatch( + &dex_file, string_index.index_, adrp_label, &boot_image_string_patches_); } -vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch( +vixl::aarch64::Label* CodeGeneratorARM64::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index, vixl::aarch64::Label* adrp_label) { - return - NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, adrp_label, &string_bss_entry_patches_); } vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) { @@ -4674,7 +4608,7 @@ vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t cust } vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativePatch( - const DexFile& dex_file, + const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -4694,8 +4628,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddres vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral( const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); @@ -4703,8 +4636,7 @@ vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLitera vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitClassLiteral( const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), [this]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(/* placeholder */ 0u); }); @@ -4737,48 +4669,52 @@ void CodeGeneratorARM64::EmitLdrOffsetPlaceholder(vixl::aarch64::Label* fixup_la __ ldr(out, MemOperand(base, /* offset placeholder */ 0)); } -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARM64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { linker_patches->push_back(Factory(info.label.GetLocation(), - &info.target_dex_file, + info.target_dex_file, info.pc_insn_label->GetLocation(), info.offset_or_index)); } } -void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size() + + boot_image_string_patches_.size() + + string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { - linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), - info.custom_data)); + linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( + info.label.GetLocation(), info.custom_data)); } DCHECK_EQ(size, linker_patches->size()); } @@ -4801,27 +4737,37 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } - // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - LocationSummary* locations = invoke->GetLocations(); - codegen_->GenerateStaticOrDirectCall( - invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + { + // Ensure that between the BLR (emitted by GenerateStaticOrDirectCall) and RecordPcInfo there + // are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); + LocationSummary* locations = invoke->GetLocations(); + codegen_->GenerateStaticOrDirectCall( + invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } - // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there - // are no pools emitted. - EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); - codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); - DCHECK(!codegen_->IsLeafMethod()); + { + // Ensure that between the BLR (emitted by GenerateVirtualCall) and RecordPcInfo there + // are no pools emitted. + EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes); + codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); + DCHECK(!codegen_->IsLeafMethod()); + } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( @@ -4833,6 +4779,7 @@ HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -4863,7 +4810,7 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -4875,13 +4822,12 @@ void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) { if (cls->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - locations->AddTemp(FixedTempLocation()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), - Primitive::kPrimNot).GetCode()); + RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference).GetCode()); locations->SetCustomSlowPathCallerSaves(caller_saves); } else { // For non-Baker read barrier we have a temp-clobbering call. @@ -4895,14 +4841,13 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } DCHECK(!cls->NeedsAccessCheck()); Location out_loc = cls->GetLocations()->Out(); Register out = OutputRegister(cls); - Register bss_entry_temp; - vixl::aarch64::Label* bss_entry_adrp_label = nullptr; const ReadBarrierOption read_barrier_option = cls->IsInBootImage() ? kWithoutReadBarrier @@ -4927,11 +4872,11 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA // Add ADRP with its PC-relative type patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeTypePatch(dex_file, type_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative type patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeTypePatch(dex_file, type_index, adrp_label); + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); break; } @@ -4943,20 +4888,39 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative type patch. + const DexFile& dex_file = cls->GetDexFile(); + dex::TypeIndex type_index = cls->GetTypeIndex(); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageTypePatch(dex_file, type_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative type patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewBootImageTypePatch(dex_file, type_index, adrp_label); + codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(dex_file.StringByTypeIdx(type_index))); + if (masked_hash != 0) { + __ Sub(out.W(), out.W(), Operand(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { // Add ADRP with its PC-relative Class .bss entry patch. const DexFile& dex_file = cls->GetDexFile(); dex::TypeIndex type_index = cls->GetTypeIndex(); - bss_entry_temp = XRegisterFrom(cls->GetLocations()->GetTemp(0)); - bss_entry_adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); - codegen_->EmitAdrpPlaceholder(bss_entry_adrp_label, bss_entry_temp); + vixl::aarch64::Register temp = XRegisterFrom(out_loc); + vixl::aarch64::Label* adrp_label = codegen_->NewBssEntryTypePatch(dex_file, type_index); + codegen_->EmitAdrpPlaceholder(adrp_label, temp); // Add LDR with its PC-relative Class patch. vixl::aarch64::Label* ldr_label = - codegen_->NewBssEntryTypePatch(dex_file, type_index, bss_entry_adrp_label); + codegen_->NewBssEntryTypePatch(dex_file, type_index, adrp_label); // /* GcRoot<mirror::Class> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(cls, out_loc, - bss_entry_temp, + temp, /* offset placeholder */ 0u, ldr_label, read_barrier_option); @@ -4984,8 +4948,8 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA bool do_clinit = cls->MustGenerateClinitCheck(); if (generate_null_check || do_clinit) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARM64( - cls, cls, cls->GetDexPc(), do_clinit, bss_entry_temp, bss_entry_adrp_label); + SlowPathCodeARM64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathARM64( + cls, cls, cls->GetDexPc(), do_clinit); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Cbz(out, slow_path->GetEntryLabel()); @@ -4995,6 +4959,7 @@ void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SA } else { __ Bind(slow_path->GetExitLabel()); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } } @@ -5004,7 +4969,7 @@ static MemOperand GetExceptionTlsAddress() { void LocationsBuilderARM64::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -5013,7 +4978,7 @@ void InstructionCodeGeneratorARM64::VisitLoadException(HLoadException* instructi } void LocationsBuilderARM64::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorARM64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -5024,6 +4989,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5039,7 +5005,7 @@ HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind( void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(calling_convention.GetReturnLocation(load->GetType())); @@ -5048,13 +5014,12 @@ void LocationsBuilderARM64::VisitLoadString(HLoadString* load) { if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - locations->AddTemp(FixedTempLocation()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), - RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot), - Primitive::kPrimNot).GetCode()); + RegisterFrom(calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference).GetCode()); locations->SetCustomSlowPathCallerSaves(caller_saves); } else { // For non-Baker read barrier we have a temp-clobbering call. @@ -5071,36 +5036,49 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD switch (load->GetLoadKind()) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { + DCHECK(codegen_->GetCompilerOptions().IsBootImage()); // Add ADRP with its PC-relative String patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); - DCHECK(codegen_->GetCompilerOptions().IsBootImage()); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); // Add ADD with its PC-relative String patch. vixl::aarch64::Label* add_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); codegen_->EmitAddPlaceholder(add_label, out.X(), out.X()); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + // Add ADRP with its PC-relative String patch. + const DexFile& dex_file = load->GetDexFile(); + const dex::StringIndex string_index = load->GetStringIndex(); + vixl::aarch64::Label* adrp_label = codegen_->NewBootImageStringPatch(dex_file, string_index); + codegen_->EmitAdrpPlaceholder(adrp_label, out.X()); + // Add LDR with its PC-relative String patch. + vixl::aarch64::Label* ldr_label = + codegen_->NewBootImageStringPatch(dex_file, string_index, adrp_label); + codegen_->EmitLdrOffsetPlaceholder(ldr_label, out.W(), out.X()); + return; } case HLoadString::LoadKind::kBssEntry: { // Add ADRP with its PC-relative String .bss entry patch. const DexFile& dex_file = load->GetDexFile(); const dex::StringIndex string_index = load->GetStringIndex(); DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - Register temp = XRegisterFrom(load->GetLocations()->GetTemp(0)); - vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index); + Register temp = XRegisterFrom(out_loc); + vixl::aarch64::Label* adrp_label = codegen_->NewStringBssEntryPatch(dex_file, string_index); codegen_->EmitAdrpPlaceholder(adrp_label, temp); - // Add LDR with its PC-relative String patch. + // Add LDR with its .bss entry String patch. vixl::aarch64::Label* ldr_label = - codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label); + codegen_->NewStringBssEntryPatch(dex_file, string_index, adrp_label); // /* GcRoot<mirror::String> */ out = *(base_address + offset) /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, @@ -5109,10 +5087,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD ldr_label, kCompilerReadBarrierOption); SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label); + new (codegen_->GetScopedAllocator()) LoadStringSlowPathARM64(load); codegen_->AddSlowPath(slow_path); __ Cbz(out.X(), slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -5137,10 +5116,11 @@ void InstructionCodeGeneratorARM64::VisitLoadString(HLoadString* load) NO_THREAD __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitLongConstant(HLongConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -5149,8 +5129,8 @@ void InstructionCodeGeneratorARM64::VisitLongConstant(HLongConstant* constant AT } void LocationsBuilderARM64::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -5164,21 +5144,22 @@ void InstructionCodeGeneratorARM64::VisitMonitorOperation(HMonitorOperation* ins } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -5191,13 +5172,13 @@ void LocationsBuilderARM64::VisitMul(HMul* mul) { void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { switch (mul->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Fmul(OutputFPRegister(mul), InputFPRegisterAt(mul, 0), InputFPRegisterAt(mul, 1)); break; @@ -5208,16 +5189,16 @@ void InstructionCodeGeneratorARM64::VisitMul(HMul* mul) { void LocationsBuilderARM64::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, ARM64EncodableConstantOrRegister(neg->InputAt(0), neg)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -5229,13 +5210,13 @@ void LocationsBuilderARM64::VisitNeg(HNeg* neg) { void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: __ Neg(OutputRegister(neg), InputOperandAt(neg, 0)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Fneg(OutputFPRegister(neg), InputFPRegisterAt(neg, 0)); break; @@ -5245,8 +5226,8 @@ void InstructionCodeGeneratorARM64::VisitNeg(HNeg* neg) { } void LocationsBuilderARM64::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetOut(LocationFrom(x0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -5260,18 +5241,19 @@ void InstructionCodeGeneratorARM64::VisitNewArray(HNewArray* instruction) { CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass()); codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(LocationFrom(kArtMethodRegister)); } else { locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) { @@ -5296,18 +5278,19 @@ void InstructionCodeGeneratorARM64::VisitNewInstance(HNewInstance* instruction) codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { switch (instruction->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: __ Mvn(OutputRegister(instruction), InputOperandAt(instruction, 0)); break; @@ -5317,7 +5300,7 @@ void InstructionCodeGeneratorARM64::VisitNot(HNot* instruction) { } void LocationsBuilderARM64::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -5345,7 +5328,7 @@ void CodeGeneratorARM64::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorARM64::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathARM64(instruction); + SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) NullCheckSlowPathARM64(instruction); AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); @@ -5371,11 +5354,18 @@ void LocationsBuilderARM64::VisitParallelMove(HParallelMove* instruction ATTRIBU } void InstructionCodeGeneratorARM64::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderARM64::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -5392,7 +5382,7 @@ void InstructionCodeGeneratorARM64::VisitParameterValue( void LocationsBuilderARM64::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(LocationFrom(kArtMethodRegister)); } @@ -5402,7 +5392,7 @@ void InstructionCodeGeneratorARM64::VisitCurrentMethod( } void LocationsBuilderARM64::VisitPhi(HPhi* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -5414,22 +5404,22 @@ void InstructionCodeGeneratorARM64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) } void LocationsBuilderARM64::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = - Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); @@ -5444,20 +5434,21 @@ void LocationsBuilderARM64::VisitRem(HRem* rem) { } void InstructionCodeGeneratorARM64::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + QuickEntrypointEnum entrypoint = + (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; codegen_->InvokeRuntime(entrypoint, rem, rem->GetDexPc()); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { CheckEntrypointTypes<kQuickFmodf, float, float, float>(); } else { CheckEntrypointTypes<kQuickFmod, double, double, double>(); @@ -5489,8 +5480,8 @@ void InstructionCodeGeneratorARM64::VisitMemoryBarrier(HMemoryBarrier* memory_ba } void LocationsBuilderARM64::VisitReturn(HReturn* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - Primitive::Type return_type = instruction->InputAt(0)->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type return_type = instruction->InputAt(0)->GetType(); locations->SetInAt(0, ARM64ReturnLocation(return_type)); } @@ -5623,8 +5614,8 @@ void InstructionCodeGeneratorARM64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). @@ -5644,11 +5635,12 @@ void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction return; } GenerateSuspendCheck(instruction, nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void LocationsBuilderARM64::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -5660,22 +5652,23 @@ void InstructionCodeGeneratorARM64::VisitThrow(HThrow* instruction) { void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); - Primitive::Type input_type = conversion->GetInputType(); - Primitive::Type result_type = conversion->GetResultType(); - DCHECK_NE(input_type, result_type); - if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || - (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { + new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); + DataType::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; + if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || + (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - if (Primitive::IsFloatingPointType(input_type)) { + if (DataType::IsFloatingPointType(input_type)) { locations->SetInAt(0, Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresRegister()); } - if (Primitive::IsFloatingPointType(result_type)) { + if (DataType::IsFloatingPointType(result_type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -5683,18 +5676,19 @@ void LocationsBuilderARM64::VisitTypeConversion(HTypeConversion* conversion) { } void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* conversion) { - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); - DCHECK_NE(input_type, result_type); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; - if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { - int result_size = Primitive::ComponentSize(result_type); - int input_size = Primitive::ComponentSize(input_type); + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { + int result_size = DataType::Size(result_type); + int input_size = DataType::Size(input_type); int min_size = std::min(result_size, input_size); Register output = OutputRegister(conversion); Register source = InputRegisterAt(conversion, 0); - if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) { // 'int' values are used directly as W registers, discarding the top // bits, so we don't need to sign-extend and can just perform a move. // We do not pass the `kDiscardForSameWReg` argument to force clearing the @@ -5703,21 +5697,19 @@ void InstructionCodeGeneratorARM64::VisitTypeConversion(HTypeConversion* convers // 32bit input value as a 64bit value assuming that the top 32 bits are // zero. __ Mov(output.W(), source.W()); - } else if (result_type == Primitive::kPrimChar || - (input_type == Primitive::kPrimChar && input_size < result_size)) { - __ Ubfx(output, - output.IsX() ? source.X() : source.W(), - 0, Primitive::ComponentSize(Primitive::kPrimChar) * kBitsPerByte); + } else if (DataType::IsUnsignedType(result_type) || + (DataType::IsUnsignedType(input_type) && input_size < result_size)) { + __ Ubfx(output, output.IsX() ? source.X() : source.W(), 0, result_size * kBitsPerByte); } else { __ Sbfx(output, output.IsX() ? source.X() : source.W(), 0, min_size * kBitsPerByte); } - } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { + } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { __ Scvtf(OutputFPRegister(conversion), InputRegisterAt(conversion, 0)); - } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { - CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); + } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { + CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); __ Fcvtzs(OutputRegister(conversion), InputFPRegisterAt(conversion, 0)); - } else if (Primitive::IsFloatingPointType(result_type) && - Primitive::IsFloatingPointType(input_type)) { + } else if (DataType::IsFloatingPointType(result_type) && + DataType::IsFloatingPointType(input_type)) { __ Fcvt(OutputFPRegister(conversion), InputFPRegisterAt(conversion, 0)); } else { LOG(FATAL) << "Unexpected or unimplemented type conversion from " << input_type @@ -5754,7 +5746,7 @@ void InstructionCodeGeneratorARM64::VisitBoundType(HBoundType* instruction ATTRI // Simple implementation of packed switch - generate cascaded compare/jumps. void LocationsBuilderARM64::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); } @@ -5844,7 +5836,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister( uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option) { - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; Register out_reg = RegisterFrom(out, type); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); @@ -5884,7 +5876,7 @@ void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters( uint32_t offset, Location maybe_temp, ReadBarrierOption read_barrier_option) { - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; Register out_reg = RegisterFrom(out, type); Register obj_reg = RegisterFrom(obj, type); if (read_barrier_option == kWithReadBarrier) { @@ -5921,7 +5913,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( vixl::aarch64::Label* fixup_label, ReadBarrierOption read_barrier_option) { DCHECK(fixup_label == nullptr || offset == 0u); - Register root_reg = RegisterFrom(root, Primitive::kPrimNot); + Register root_reg = RegisterFrom(root, DataType::Type::kReference); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { @@ -5978,7 +5970,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. The entrypoint will // be loaded by the slow path code. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root); + new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARM64(instruction, root); codegen_->AddSlowPath(slow_path); // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -6021,6 +6013,7 @@ void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction, @@ -6074,22 +6067,25 @@ void CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* ins obj.GetCode()); vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Field LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - __ ldr(ref_reg, MemOperand(base.X(), offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); + { + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Field LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + __ ldr(ref_reg, MemOperand(base.X(), offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); } - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; } @@ -6121,7 +6117,7 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); - size_t scale_factor = Primitive::ComponentSizeShift(Primitive::kPrimNot); + size_t scale_factor = DataType::SizeShift(DataType::Type::kReference); if (kBakerReadBarrierLinkTimeThunksEnableForArrays && !Runtime::Current()->UseJitCompilation()) { @@ -6146,8 +6142,8 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins // gray_return_address: DCHECK(index.IsValid()); - Register index_reg = RegisterFrom(index, Primitive::kPrimInt); - Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); UseScratchRegisterScope temps(GetVIXLAssembler()); DCHECK(temps.IsAvailable(ip0)); @@ -6158,19 +6154,22 @@ void CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* ins vixl::aarch64::Label* cbnz_label = NewBakerReadBarrierPatch(custom_data); __ Add(temp.X(), obj.X(), Operand(data_offset)); - EmissionCheckScope guard(GetVIXLAssembler(), - (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); - vixl::aarch64::Label return_address; - __ adr(lr, &return_address); - __ Bind(cbnz_label); - __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. - static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), - "Array LDR must be 1 instruction (4B) before the return address label; " - " 2 instructions (8B) for heap poisoning."); - __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); - __ Bind(&return_address); + { + EmissionCheckScope guard(GetVIXLAssembler(), + (kPoisonHeapReferences ? 4u : 3u) * vixl::aarch64::kInstructionSize); + vixl::aarch64::Label return_address; + __ adr(lr, &return_address); + __ Bind(cbnz_label); + __ cbnz(mr, static_cast<int64_t>(0)); // Placeholder, patched at link-time. + static_assert(BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET == (kPoisonHeapReferences ? -8 : -4), + "Array LDR must be 1 instruction (4B) before the return address label; " + " 2 instructions (8B) for heap poisoning."); + __ ldr(ref_reg, MemOperand(temp.X(), index_reg.X(), LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); + __ Bind(&return_address); + } + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__, /* temp_loc */ LocationFrom(ip1)); return; } @@ -6230,7 +6229,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // Slow path marking the object `ref` when the GC is marking. The // entrypoint will be loaded by the slow path code. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARM64( + new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARM64( instruction, ref, obj, @@ -6247,6 +6246,7 @@ void CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* GenerateRawReferenceLoad( instruction, ref, obj, offset, index, scale_factor, needs_null_check, use_load_acquire); __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, @@ -6287,7 +6287,7 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* // Slow path updating the object reference at address `obj + field_offset` // when the GC is marking. The entrypoint will be loaded by the slow path code. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( + new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARM64( instruction, ref, obj, @@ -6303,6 +6303,7 @@ void CodeGeneratorARM64::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ __LINE__); } void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, @@ -6314,7 +6315,7 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, bool needs_null_check, bool use_load_acquire) { DCHECK(obj.IsW()); - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; Register ref_reg = RegisterFrom(ref, type); // If needed, vixl::EmissionCheckScope guards are used to ensure @@ -6381,6 +6382,19 @@ void CodeGeneratorARM64::GenerateRawReferenceLoad(HInstruction* instruction, GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); } +void CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { + // The following condition is a compile-time one, so it does not have a run-time cost. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + // The following condition is a run-time one; it is executed after the + // previous compile-time test, to avoid penalizing non-debug builds. + if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + Register temp = temp_loc.IsValid() ? WRegisterFrom(temp_loc) : temps.AcquireW(); + GetAssembler()->GenerateMarkingRegisterCheck(temp, code); + } + } +} + void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, Location out, Location ref, @@ -6400,7 +6414,7 @@ void CodeGeneratorARM64::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) + SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathARM64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -6436,7 +6450,7 @@ void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instructio // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeARM64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARM64(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathARM64(instruction, out, root); AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); @@ -6445,7 +6459,7 @@ void CodeGeneratorARM64::GenerateReadBarrierForRootSlow(HInstruction* instructio void LocationsBuilderARM64::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -6482,17 +6496,13 @@ void CodeGeneratorARM64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_ for (const auto& entry : jit_string_patches_) { const StringReference& string_reference = entry.first; vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - const auto it = jit_string_roots_.find(string_reference); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { const TypeReference& type_reference = entry.first; vixl::aarch64::Literal<uint32_t>* table_entry_literal = entry.second; - const auto it = jit_class_roots_.find(type_reference); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 584eead81b..0654046de5 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -20,12 +20,12 @@ #include "arch/arm64/quick_method_frame_info_arm64.h" #include "code_generator.h" #include "common_arm64.h" -#include "dex_file_types.h" +#include "dex/dex_file_types.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "string_reference.h" -#include "type_reference.h" #include "utils/arm64/assembler_arm64.h" // TODO(VIXL): Make VIXL compile with -Wshadow. @@ -100,7 +100,7 @@ const vixl::aarch64::CPURegList callee_saved_fp_registers(vixl::aarch64::CPURegi vixl::aarch64::kDRegSize, vixl::aarch64::d8.GetCode(), vixl::aarch64::d15.GetCode()); -Location ARM64ReturnLocation(Primitive::Type return_type); +Location ARM64ReturnLocation(DataType::Type return_type); class SlowPathCodeARM64 : public SlowPathCode { public: @@ -171,7 +171,7 @@ class InvokeRuntimeCallingConvention : public CallingConvention<vixl::aarch64::R kRuntimeParameterFpuRegistersLength, kArm64PointerSize) {} - Location GetReturnLocation(Primitive::Type return_type); + Location GetReturnLocation(DataType::Type return_type); private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -187,7 +187,7 @@ class InvokeDexCallingConvention : public CallingConvention<vixl::aarch64::Regis kParameterFPRegistersLength, kArm64PointerSize) {} - Location GetReturnLocation(Primitive::Type return_type) const { + Location GetReturnLocation(DataType::Type return_type) const { return ARM64ReturnLocation(return_type); } @@ -201,8 +201,8 @@ class InvokeDexCallingConventionVisitorARM64 : public InvokeDexCallingConvention InvokeDexCallingConventionVisitorARM64() {} virtual ~InvokeDexCallingConventionVisitorARM64() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type return_type) const OVERRIDE { + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type return_type) const OVERRIDE { return calling_convention.GetReturnLocation(return_type); } Location GetMethodLocation() const OVERRIDE; @@ -223,16 +223,16 @@ class FieldAccessCallingConventionARM64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const OVERRIDE { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return helpers::LocationFrom(vixl::aarch64::x0); } - Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED, + Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { return is_instance ? helpers::LocationFrom(vixl::aarch64::x2) : helpers::LocationFrom(vixl::aarch64::x1); } - Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return helpers::LocationFrom(vixl::aarch64::d0); } @@ -489,7 +489,7 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t GetPreferredSlotsAlignment() const OVERRIDE { return vixl::aarch64::kXRegSizeInBytes; } JumpTableARM64* CreateJumpTable(HPackedSwitch* switch_instr) { - jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARM64(switch_instr)); + jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARM64(switch_instr)); return jump_tables_.back().get(); } @@ -498,13 +498,13 @@ class CodeGeneratorARM64 : public CodeGenerator { // Code generation helpers. void MoveConstant(vixl::aarch64::CPURegister destination, HConstant* constant); void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; - void Load(Primitive::Type type, + void Load(DataType::Type type, vixl::aarch64::CPURegister dst, const vixl::aarch64::MemOperand& src); - void Store(Primitive::Type type, + void Store(DataType::Type type, vixl::aarch64::CPURegister src, const vixl::aarch64::MemOperand& dst); void LoadAcquire(HInstruction* instruction, @@ -512,7 +512,7 @@ class CodeGeneratorARM64 : public CodeGenerator { const vixl::aarch64::MemOperand& src, bool needs_null_check); void StoreRelease(HInstruction* instruction, - Primitive::Type type, + DataType::Type type, vixl::aarch64::CPURegister src, const vixl::aarch64::MemOperand& dst, bool needs_null_check); @@ -531,7 +531,7 @@ class CodeGeneratorARM64 : public CodeGenerator { ParallelMoveResolverARM64* GetMoveResolver() OVERRIDE { return &move_resolver_; } - bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } @@ -557,7 +557,7 @@ class CodeGeneratorARM64 : public CodeGenerator { HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { UNIMPLEMENTED(FATAL); } @@ -565,8 +565,8 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageMethodPatch(MethodReference target_method, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry method patch for an instruction and return // the label to be bound before the instruction. The instruction will be @@ -579,9 +579,9 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new .bss entry type patch for an instruction and return the label // to be bound before the instruction. The instruction will be either the @@ -595,9 +595,17 @@ class CodeGeneratorARM64 : public CodeGenerator { // to be bound before the instruction. The instruction will be either the // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing // to the associated ADRP patch label). - vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - vixl::aarch64::Label* adrp_label = nullptr); + vixl::aarch64::Label* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); + + // Add a new .bss entry string patch for an instruction and return the label + // to be bound before the instruction. The instruction will be either the + // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing + // to the associated ADRP patch label). + vixl::aarch64::Label* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + vixl::aarch64::Label* adrp_label = nullptr); // Add a new baker read barrier patch and return the label to be bound // before the CBNZ instruction. @@ -619,7 +627,7 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Register out, vixl::aarch64::Register base); - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; @@ -687,6 +695,22 @@ class CodeGeneratorARM64 : public CodeGenerator { bool needs_null_check, bool use_load_acquire); + // Emit code checking the status of the Marking Register, and + // aborting the program if MR does not match the value stored in the + // art::Thread object. Code is only emitted in debug mode and if + // CompilerOptions::EmitRunTimeChecksInDebugMode returns true. + // + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck in the code generator, and is + // passed to the BRK instruction. + // + // If `temp_loc` is a valid location, it is expected to be a + // register and will be used as a temporary to generate code; + // otherwise, a temporary will be fetched from the core register + // scratch pool. + virtual void MaybeGenerateMarkingRegisterCheck(int code, + Location temp_loc = Location::NoLocation()); + // Generate a read barrier for a heap reference within `instruction` // using a slow path. // @@ -753,17 +777,12 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value); vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value); - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) - : target_dex_file(dex_file), offset_or_index(off_or_idx), label(), pc_insn_label() { } + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + struct PcRelativePatchInfo : PatchInfo<vixl::aarch64::Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) + : PatchInfo<vixl::aarch64::Label>(dex_file, off_or_idx), pc_insn_label() { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - vixl::aarch64::Label label; vixl::aarch64::Label* pc_insn_label; }; @@ -774,16 +793,16 @@ class CodeGeneratorARM64 : public CodeGenerator { uint32_t custom_data; }; - vixl::aarch64::Label* NewPcRelativePatch(const DexFile& dex_file, + vixl::aarch64::Label* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, vixl::aarch64::Label* adrp_label, ArenaDeque<PcRelativePatchInfo>* patches); void EmitJumpTables(); - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch64::Label` objects do not move in memory. @@ -802,15 +821,17 @@ class CodeGeneratorARM64 : public CodeGenerator { // Deduplication map for 64-bit literals, used for non-patchable method address or method code. Uint64ToLiteralMap uint64_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index b9d4700511..2452139d42 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -21,13 +21,16 @@ #include "art_method.h" #include "base/bit_utils.h" #include "base/bit_utils_iterator.h" +#include "class_table.h" #include "code_generator_utils.h" #include "common_arm.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics_arm_vixl.h" #include "linker/arm/relative_patcher_thumb2.h" +#include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "thread.h" @@ -94,6 +97,9 @@ constexpr bool kBakerReadBarrierLinkTimeThunksEnableForGcRoots = true; // The reserved entrypoint register for link-time generated thunks. const vixl32::Register kBakerCcEntrypointRegister = r4; +// Using a base helps identify when we hit Marking Register check breakpoints. +constexpr int kMarkingRegisterCheckBreakCodeBaseCode = 0x10; + #ifdef __ #error "ARM Codegen VIXL macro-assembler macro already defined." #endif @@ -276,6 +282,58 @@ static size_t RestoreContiguousSRegisterList(size_t first, return stack_offset; } +static LoadOperandType GetLoadOperandType(DataType::Type type) { + switch (type) { + case DataType::Type::kReference: + return kLoadWord; + case DataType::Type::kBool: + case DataType::Type::kUint8: + return kLoadUnsignedByte; + case DataType::Type::kInt8: + return kLoadSignedByte; + case DataType::Type::kUint16: + return kLoadUnsignedHalfword; + case DataType::Type::kInt16: + return kLoadSignedHalfword; + case DataType::Type::kInt32: + return kLoadWord; + case DataType::Type::kInt64: + return kLoadWordPair; + case DataType::Type::kFloat32: + return kLoadSWord; + case DataType::Type::kFloat64: + return kLoadDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + +static StoreOperandType GetStoreOperandType(DataType::Type type) { + switch (type) { + case DataType::Type::kReference: + return kStoreWord; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + return kStoreByte; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + return kStoreHalfword; + case DataType::Type::kInt32: + return kStoreWord; + case DataType::Type::kInt64: + return kStoreWordPair; + case DataType::Type::kFloat32: + return kStoreSWord; + case DataType::Type::kFloat64: + return kStoreDWord; + default: + LOG(FATAL) << "Unreachable type " << type; + UNREACHABLE(); + } +} + void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) { size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath(); size_t orig_offset = stack_offset; @@ -444,10 +502,10 @@ class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { codegen->EmitParallelMoves( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimInt, + DataType::Type::kInt32, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt); + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -474,29 +532,12 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); - constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - bool is_load_class_bss_entry = - (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); - vixl32::Register entry_address; - if (is_load_class_bss_entry && call_saves_everything_except_r0) { - vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); - // In the unlucky case that the `temp` is R0, we preserve the address in `out` across - // the kSaveEverything call. - bool temp_is_r0 = temp.Is(calling_convention.GetRegisterAt(0)); - entry_address = temp_is_r0 ? RegisterFrom(out) : temp; - DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0))); - if (temp_is_r0) { - __ Mov(entry_address, temp); - } - } dex::TypeIndex type_index = cls_->GetTypeIndex(); __ Mov(calling_convention.GetRegisterAt(0), type_index.index_); QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage @@ -508,22 +549,6 @@ class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - if (is_load_class_bss_entry) { - if (call_saves_everything_except_r0) { - // The class entry address was preserved in `entry_address` thanks to kSaveEverything. - __ Str(r0, MemOperand(entry_address)); - } else { - // For non-Baker read barrier, we need to re-calculate the address of the string entry. - UseScratchRegisterScope temps( - down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - arm_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); - arm_codegen->EmitMovwMovtPlaceholder(labels, temp); - __ Str(r0, MemOperand(temp)); - } - } // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); @@ -558,48 +583,17 @@ class LoadStringSlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); - vixl32::Register out = OutputRegister(load); - constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier); + const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - // In the unlucky case that the `temp` is R0, we preserve the address in `out` across - // the kSaveEverything call. - vixl32::Register entry_address; - if (call_saves_everything_except_r0) { - vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); - bool temp_is_r0 = (temp.Is(calling_convention.GetRegisterAt(0))); - entry_address = temp_is_r0 ? out : temp; - DCHECK(!entry_address.Is(calling_convention.GetRegisterAt(0))); - if (temp_is_r0) { - __ Mov(entry_address, temp); - } - } - __ Mov(calling_convention.GetRegisterAt(0), string_index.index_); arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - // Store the resolved String to the .bss entry. - if (call_saves_everything_except_r0) { - // The string entry address was preserved in `entry_address` thanks to kSaveEverything. - __ Str(r0, MemOperand(entry_address)); - } else { - // For non-Baker read barrier, we need to re-calculate the address of the string entry. - UseScratchRegisterScope temps( - down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - arm_codegen->EmitMovwMovtPlaceholder(labels, temp); - __ Str(r0, MemOperand(temp)); - } - arm_codegen->Move32(locations->Out(), LocationFrom(r0)); RestoreLiveRegisters(codegen, locations); @@ -625,7 +619,7 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -635,10 +629,10 @@ class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL { codegen->EmitParallelMoves(locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, @@ -705,21 +699,21 @@ class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConventionARMVIXL calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), LocationFrom(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -1134,7 +1128,7 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL // // Note that this field could also hold a different object, if // another thread had concurrently changed it. In that case, the - // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set + // LDREX/CMP/BNE sequence of instructions in the compare-and-set // (CAS) operation below would abort the CAS, leaving the field // as-is. __ Cmp(temp1_, ref_reg); @@ -1174,28 +1168,16 @@ class LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL // tmp = [r_ptr] - expected; // } while (tmp == 0 && failure([r_ptr] <- r_new_value)); - vixl32::Label loop_head, exit_loop; + vixl32::Label loop_head, comparison_failed, exit_loop; __ Bind(&loop_head); - __ Ldrex(tmp, MemOperand(tmp_ptr)); - - __ Subs(tmp, tmp, expected); - - { - ExactAssemblyScope aas(arm_codegen->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - - __ it(ne); - __ clrex(ne); - } - - __ B(ne, &exit_loop, /* far_target */ false); - + __ Cmp(tmp, expected); + __ B(ne, &comparison_failed, /* far_target */ false); __ Strex(tmp, value, MemOperand(tmp_ptr)); - __ Cmp(tmp, 1); - __ B(eq, &loop_head, /* far_target */ false); - + __ CompareAndBranchIfZero(tmp, &exit_loop, /* far_target */ false); + __ B(&loop_head); + __ Bind(&comparison_failed); + __ Clrex(); __ Bind(&exit_loop); if (kPoisonHeapReferences) { @@ -1356,19 +1338,19 @@ class ReadBarrierForHeapReferenceSlowPathARMVIXL : public SlowPathCodeARMVIXL { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConventionARMVIXL calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, LocationFrom(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove(obj_, LocationFrom(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, LocationFrom(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -1635,7 +1617,7 @@ static Operand GetShifterOperand(vixl32::Register rm, ShiftType shift, uint32_t static void GenerateLongDataProc(HDataProcWithShifterOp* instruction, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); const LocationSummary* const locations = instruction->GetLocations(); @@ -1770,12 +1752,12 @@ static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codege // care here. DCHECK(rhs_loc.GetConstant()->IsArithmeticZero()); - const Primitive::Type type = instruction->InputAt(0)->GetType(); + const DataType::Type type = instruction->InputAt(0)->GetType(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0); } } else { @@ -1815,7 +1797,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( HCondition* condition, bool invert, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); + DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); const LocationSummary* const locations = condition->GetLocations(); IfCondition cond = condition->GetCondition(); @@ -1872,15 +1854,26 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant( case kCondBE: case kCondA: case kCondAE: { + const uint32_t value_low = Low32Bits(value); + Operand operand_low(value_low); + __ Cmp(left_high, High32Bits(value)); + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we must ensure that the operands corresponding to the least significant + // halves of the inputs fit into a 16-bit CMP encoding. + if (!left_low.IsLow() || !IsUint<8>(value_low)) { + operand_low = Operand(temps.Acquire()); + __ Mov(LeaveFlags, operand_low.GetBaseRegister(), value_low); + } + // We use the scope because of the IT block that follows. ExactAssemblyScope guard(codegen->GetVIXLAssembler(), 2 * vixl32::k16BitT32InstructionSizeInBytes, CodeBufferCheckScope::kExactSize); __ it(eq); - __ cmp(eq, left_low, Low32Bits(value)); + __ cmp(eq, left_low, operand_low); ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite)); break; } @@ -1925,7 +1918,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest( HCondition* condition, bool invert, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong); + DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); const LocationSummary* const locations = condition->GetLocations(); IfCondition cond = condition->GetCondition(); @@ -1995,7 +1988,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest( static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition, bool invert, CodeGeneratorARMVIXL* codegen) { - const Primitive::Type type = condition->GetLeft()->GetType(); + const DataType::Type type = condition->GetLeft()->GetType(); IfCondition cond = condition->GetCondition(); IfCondition opposite = condition->GetOppositeCondition(); std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne); @@ -2004,17 +1997,17 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* std::swap(cond, opposite); } - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { ret = condition->GetLocations()->InAt(1).IsConstant() ? GenerateLongTestConstant(condition, invert, codegen) : GenerateLongTest(condition, invert, codegen); - } else if (Primitive::IsFloatingPointType(type)) { + } else if (DataType::IsFloatingPointType(type)) { GenerateVcmp(condition, codegen); __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR); ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()), ARMFPCondition(opposite, condition->IsGtBias())); } else { - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1)); ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite)); } @@ -2022,46 +2015,7 @@ static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* return ret; } -static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) { - if (condition->GetLeft()->GetType() == Primitive::kPrimLong) { - const LocationSummary* const locations = condition->GetLocations(); - - if (locations->InAt(1).IsConstant()) { - IfCondition c = condition->GetCondition(); - IfCondition opposite = condition->GetOppositeCondition(); - const int64_t value = - AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite); - - if (c < kCondLT || c > kCondGE) { - // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, - // we check that the least significant half of the first input to be compared - // is in a low register (the other half is read outside an IT block), and - // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP - // encoding can be used; 0 is always handled, no matter what registers are - // used by the first input. - if (value != 0 && - (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) { - return false; - } - // TODO(VIXL): The rest of the checks are there to keep the backend in sync with - // the previous one, but are not strictly necessary. - } else if (c == kCondLE || c == kCondGT) { - if (value < std::numeric_limits<int64_t>::max() && - !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) { - return false; - } - } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) { - return false; - } - } - } - - return true; -} - static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK(CanGenerateTest(cond, codegen->GetAssembler())); - const vixl32::Register out = OutputRegister(cond); const auto condition = GenerateTest(cond, false, codegen); @@ -2089,7 +2043,7 @@ static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* cod } static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); const LocationSummary* const locations = cond->GetLocations(); IfCondition condition = cond->GetCondition(); @@ -2144,93 +2098,8 @@ static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { } } -static void GenerateLongComparesAndJumps(HCondition* cond, - vixl32::Label* true_label, - vixl32::Label* false_label, - CodeGeneratorARMVIXL* codegen, - bool is_far_target = true) { - LocationSummary* locations = cond->GetLocations(); - Location left = locations->InAt(0); - Location right = locations->InAt(1); - IfCondition if_cond = cond->GetCondition(); - - vixl32::Register left_high = HighRegisterFrom(left); - vixl32::Register left_low = LowRegisterFrom(left); - IfCondition true_high_cond = if_cond; - IfCondition false_high_cond = cond->GetOppositeCondition(); - vixl32::Condition final_condition = ARMUnsignedCondition(if_cond); // unsigned on lower part - - // Set the conditions for the test, remembering that == needs to be - // decided using the low words. - switch (if_cond) { - case kCondEQ: - case kCondNE: - // Nothing to do. - break; - case kCondLT: - false_high_cond = kCondGT; - break; - case kCondLE: - true_high_cond = kCondLT; - break; - case kCondGT: - false_high_cond = kCondLT; - break; - case kCondGE: - true_high_cond = kCondGT; - break; - case kCondB: - false_high_cond = kCondA; - break; - case kCondBE: - true_high_cond = kCondB; - break; - case kCondA: - false_high_cond = kCondB; - break; - case kCondAE: - true_high_cond = kCondA; - break; - } - if (right.IsConstant()) { - int64_t value = Int64ConstantFrom(right); - int32_t val_low = Low32Bits(value); - int32_t val_high = High32Bits(value); - - __ Cmp(left_high, val_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } else { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, val_low); - } else { - vixl32::Register right_high = HighRegisterFrom(right); - vixl32::Register right_low = LowRegisterFrom(right); - - __ Cmp(left_high, right_high); - if (if_cond == kCondNE) { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - } else if (if_cond == kCondEQ) { - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } else { - __ B(ARMCondition(true_high_cond), true_label, is_far_target); - __ B(ARMCondition(false_high_cond), false_label, is_far_target); - } - // Must be equal high, so compare the lows. - __ Cmp(left_low, right_low); - } - // The last comparison might be unsigned. - // TODO: optimize cases where this is always true/false - __ B(final_condition, true_label, is_far_target); -} - static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong); + DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); const LocationSummary* const locations = cond->GetLocations(); IfCondition condition = cond->GetCondition(); @@ -2283,47 +2152,23 @@ static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codege } } - if ((condition == kCondEQ || condition == kCondNE) && - // If `out` is a low register, then the GenerateConditionGeneric() - // function generates a shorter code sequence that is still branchless. - (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) { + // If `out` is a low register, then the GenerateConditionGeneric() + // function generates a shorter code sequence that is still branchless. + if ((condition == kCondEQ || condition == kCondNE) && !out.IsLow()) { GenerateEqualLong(cond, codegen); return; } - if (CanGenerateTest(cond, codegen->GetAssembler())) { - GenerateConditionGeneric(cond, codegen); - return; - } - - // Convert the jumps into the result. - vixl32::Label done_label; - vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label); - vixl32::Label true_label, false_label; - - GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false); - - // False case: result = 0. - __ Bind(&false_label); - __ Mov(out, 0); - __ B(final_label); - - // True case: result = 1. - __ Bind(&true_label); - __ Mov(out, 1); - - if (done_label.IsReferenced()) { - __ Bind(&done_label); - } + GenerateConditionGeneric(cond, codegen); } static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) { - const Primitive::Type type = cond->GetLeft()->GetType(); + const DataType::Type type = cond->GetLeft()->GetType(); - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { GenerateConditionLong(cond, codegen); return; } @@ -2409,12 +2254,12 @@ static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, } static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { - const Primitive::Type type = constant->GetType(); + const DataType::Type type = constant->GetType(); bool ret = false; - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { const uint64_t value = Uint64ConstantFrom(constant); ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value)); @@ -2426,7 +2271,7 @@ static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) { } static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) { - DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) { return Location::ConstantLocation(constant->AsConstant()); @@ -2500,25 +2345,26 @@ CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph, ComputeSRegisterListMask(kFpuCalleeSaves), compiler_options, stats), - block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + block_labels_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator()), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + baker_read_barrier_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Always save the LR register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(LR)); // Give D30 and D31 as scratch register to VIXL. The register allocator only works on @@ -2639,14 +2485,37 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); __ Bind(&frame_entry_label_); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + } + if (HasEmptyFrame()) { return; } if (!skip_overflow_check) { + // Using r4 instead of IP saves 2 bytes. UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(kArm))); + vixl32::Register temp; + // TODO: Remove this check when R4 is made a callee-save register + // in ART compiled code (b/72801708). Currently we need to make + // sure r4 is not blocked, e.g. in special purpose + // TestCodeGeneratorARMVIXL; also asserting that r4 is available + // here. + if (!blocked_core_registers_[R4]) { + for (vixl32::Register reg : kParameterCoreRegistersVIXL) { + DCHECK(!reg.Is(r4)); + } + DCHECK(!kCoreCalleeSaves.Includes(r4)); + temp = r4; + } else { + temp = temps.Acquire(); + } + __ Sub(temp, sp, Operand::From(GetStackOverflowReservedBytes(InstructionSet::kArm))); // The load must immediately precede RecordPcInfo. ExactAssemblyScope aas(GetVIXLAssembler(), vixl32::kMaxInstructionSizeInBytes, @@ -2690,6 +2559,8 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { __ Mov(temp, 0); GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } + + MaybeGenerateMarkingRegisterCheck(/* code */ 1); } void CodeGeneratorARMVIXL::GenerateFrameExit() { @@ -2724,14 +2595,15 @@ void CodeGeneratorARMVIXL::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(DataType::Type type) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { uint32_t index = gp_index_++; uint32_t stack_index = stack_index_++; if (index < calling_convention.GetNumberOfRegisters()) { @@ -2741,7 +2613,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Ty } } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t index = gp_index_; uint32_t stack_index = stack_index_; gp_index_ += 2; @@ -2764,7 +2636,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Ty } } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t stack_index = stack_index_++; if (float_index_ % 2 == 0) { float_index_ = std::max(double_index_, float_index_); @@ -2776,7 +2648,7 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Ty } } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { double_index_ = std::max(double_index_, RoundUp(float_index_, 2)); uint32_t stack_index = stack_index_; stack_index_ += 2; @@ -2793,37 +2665,42 @@ Location InvokeDexCallingConventionVisitorARMVIXL::GetNextLocation(Primitive::Ty } } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; } return Location::NoLocation(); } -Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(Primitive::Type type) const { +Location InvokeDexCallingConventionVisitorARMVIXL::GetReturnLocation(DataType::Type type) const { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: { return LocationFrom(r0); } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { return LocationFrom(s0); } - case Primitive::kPrimLong: { + case DataType::Type::kUint64: + case DataType::Type::kInt64: { return LocationFrom(r0, r1); } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { return LocationFrom(s0, s1); } - case Primitive::kPrimVoid: + case DataType::Type::kVoid: return Location::NoLocation(); } @@ -2881,10 +2758,10 @@ void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) { __ Mov(RegisterFrom(location), value); } -void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { +void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, DataType::Type dst_type) { // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend. - HParallelMove move(GetGraph()->GetArena()); + HParallelMove move(GetGraph()->GetAllocator()); move.AddMove(src, dst, dst_type, nullptr); GetMoveResolver()->EmitNativeCode(&move); } @@ -2926,18 +2803,32 @@ void CodeGeneratorARMVIXL::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_poi } void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temps.Acquire(); + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(temp, temp, 1); + __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + __ Pop(vixl32::Register(kMethodRegister)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) { GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 2); } if (!codegen_->GoesToNextBlock(block, successor)) { __ B(codegen_->GetLabelOf(successor)); @@ -2971,56 +2862,41 @@ void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { } void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition, - vixl32::Label* true_target_in, - vixl32::Label* false_target_in, + vixl32::Label* true_target, + vixl32::Label* false_target, bool is_far_target) { - if (CanGenerateTest(condition, codegen_->GetAssembler())) { - vixl32::Label* non_fallthrough_target; - bool invert; - bool emit_both_branches; - - if (true_target_in == nullptr) { - // The true target is fallthrough. - DCHECK(false_target_in != nullptr); - non_fallthrough_target = false_target_in; - invert = true; - emit_both_branches = false; - } else { - non_fallthrough_target = true_target_in; - invert = false; - // Either the false target is fallthrough, or there is no fallthrough - // and both branches must be emitted. - emit_both_branches = (false_target_in != nullptr); - } - - const auto cond = GenerateTest(condition, invert, codegen_); - - __ B(cond.first, non_fallthrough_target, is_far_target); + if (true_target == false_target) { + DCHECK(true_target != nullptr); + __ B(true_target); + return; + } - if (emit_both_branches) { - // No target falls through, we need to branch. - __ B(false_target_in); - } + vixl32::Label* non_fallthrough_target; + bool invert; + bool emit_both_branches; - return; + if (true_target == nullptr) { + // The true target is fallthrough. + DCHECK(false_target != nullptr); + non_fallthrough_target = false_target; + invert = true; + emit_both_branches = false; + } else { + non_fallthrough_target = true_target; + invert = false; + // Either the false target is fallthrough, or there is no fallthrough + // and both branches must be emitted. + emit_both_branches = (false_target != nullptr); } - // Generated branching requires both targets to be explicit. If either of the - // targets is nullptr (fallthrough) use and bind `fallthrough` instead. - vixl32::Label fallthrough; - vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in; - vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in; + const auto cond = GenerateTest(condition, invert, codegen_); - DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target); + __ B(cond.first, non_fallthrough_target, is_far_target); - if (false_target != &fallthrough) { + if (emit_both_branches) { + // No target falls through, we need to branch. __ B(false_target); } - - if (fallthrough.IsReferenced()) { - __ Bind(&fallthrough); - } } void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instruction, @@ -3078,8 +2954,8 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru // If this is a long or FP comparison that has been folded into // the HCondition, generate the comparison directly. - Primitive::Type type = condition->InputAt(0)->GetType(); - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + DataType::Type type = condition->InputAt(0)->GetType(); + if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { GenerateCompareTestAndBranch(condition, true_target, false_target, far_target); return; } @@ -3118,7 +2994,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateTestAndBranch(HInstruction* instru } void LocationsBuilderARMVIXL::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -3135,7 +3011,7 @@ void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) { } void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConventionARMVIXL calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -3156,7 +3032,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) { } void LocationsBuilderARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -3169,8 +3045,8 @@ void InstructionCodeGeneratorARMVIXL::VisitShouldDeoptimizeFlag(HShouldDeoptimiz } void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType()); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); + const bool is_floating_point = DataType::IsFloatingPointType(select->GetType()); if (is_floating_point) { locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -3198,10 +3074,22 @@ void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) { void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { HInstruction* const condition = select->GetCondition(); const LocationSummary* const locations = select->GetLocations(); - const Primitive::Type type = select->GetType(); + const DataType::Type type = select->GetType(); const Location first = locations->InAt(0); const Location out = locations->Out(); const Location second = locations->InAt(1); + + // In the unlucky case the output of this instruction overlaps + // with an input of an "emitted-at-use-site" condition, and + // the output of this instruction is not one of its inputs, we'll + // need to fallback to branches instead of conditional ARM instructions. + bool output_overlaps_with_condition_inputs = + !IsBooleanValueOrMaterializedCondition(condition) && + !out.Equals(first) && + !out.Equals(second) && + (condition->GetLocations()->InAt(0).Equals(out) || + condition->GetLocations()->InAt(1).Equals(out)); + DCHECK(!output_overlaps_with_condition_inputs || condition->IsCondition()); Location src; if (condition->IsIntConstant()) { @@ -3215,9 +3103,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { return; } - if (!Primitive::IsFloatingPointType(type) && - (IsBooleanValueOrMaterializedCondition(condition) || - CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) { + if (!DataType::IsFloatingPointType(type) && !output_overlaps_with_condition_inputs) { bool invert = false; if (out.Equals(second)) { @@ -3289,6 +3175,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { vixl32::Label* false_target = nullptr; vixl32::Label* true_target = nullptr; vixl32::Label select_end; + vixl32::Label other_case; vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end); if (out.Equals(second)) { @@ -3299,12 +3186,21 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { src = second; if (!out.Equals(first)) { - codegen_->MoveLocation(out, first, type); + if (output_overlaps_with_condition_inputs) { + false_target = &other_case; + } else { + codegen_->MoveLocation(out, first, type); + } } } GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false); codegen_->MoveLocation(out, src, type); + if (output_overlaps_with_condition_inputs) { + __ B(target); + __ Bind(&other_case); + codegen_->MoveLocation(out, first, type); + } if (select_end.IsReferenced()) { __ Bind(&select_end); @@ -3312,7 +3208,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) { } void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -3402,32 +3298,17 @@ void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition, void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); - // Handle the long/FP comparisons made in instruction simplification. - switch (cond->InputAt(0)->GetType()) { - case Primitive::kPrimLong: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - break; - - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } - break; - - default: - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); - if (!cond->IsEmittedAtUseSite()) { - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - } + new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); + const DataType::Type type = cond->InputAt(0)->GetType(); + if (DataType::IsFloatingPointType(type)) { + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, ArithmeticZeroOrFpuRegister(cond->InputAt(1))); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1))); + } + if (!cond->IsEmittedAtUseSite()) { + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } } @@ -3436,22 +3317,22 @@ void InstructionCodeGeneratorARMVIXL::HandleCondition(HCondition* cond) { return; } - const Primitive::Type type = cond->GetLeft()->GetType(); + const DataType::Type type = cond->GetLeft()->GetType(); - if (Primitive::IsFloatingPointType(type)) { + if (DataType::IsFloatingPointType(type)) { GenerateConditionGeneric(cond, codegen_); return; } - DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type; + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; const IfCondition condition = cond->GetCondition(); // A condition with only one boolean input, or two boolean inputs without being equality or // inequality results from transformations done by the instruction simplifier, and is handled // as a regular condition with integral inputs. - if (type == Primitive::kPrimBoolean && - cond->GetRight()->GetType() == Primitive::kPrimBoolean && + if (type == DataType::Type::kBool && + cond->GetRight()->GetType() == DataType::Type::kBool && (condition == kCondEQ || condition == kCondNE)) { vixl32::Register left = InputRegisterAt(cond, 0); const vixl32::Register out = OutputRegister(cond); @@ -3561,7 +3442,7 @@ void InstructionCodeGeneratorARMVIXL::VisitAboveOrEqual(HAboveOrEqual* comp) { void LocationsBuilderARMVIXL::VisitIntConstant(HIntConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3571,7 +3452,7 @@ void InstructionCodeGeneratorARMVIXL::VisitIntConstant(HIntConstant* constant AT void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3581,7 +3462,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3591,7 +3472,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLongConstant(HLongConstant* constant void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3602,7 +3483,7 @@ void InstructionCodeGeneratorARMVIXL::VisitFloatConstant( void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3638,7 +3519,7 @@ void InstructionCodeGeneratorARMVIXL::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE void LocationsBuilderARMVIXL::VisitReturn(HReturn* ret) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); locations->SetInAt(0, parameter_visitor_.GetReturnLocation(ret->InputAt(0)->GetType())); } @@ -3655,6 +3536,7 @@ void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) { codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 3); } void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { @@ -3685,12 +3567,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrD DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 4); return; } LocationSummary* locations = invoke->GetLocations(); codegen_->GenerateStaticOrDirectCall( invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation()); + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 5); } void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) { @@ -3709,11 +3594,14 @@ void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) { if (TryGenerateIntrinsicCode(invoke, codegen_)) { + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 6); return; } codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0)); DCHECK(!codegen_->IsLeafMethod()); + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 7); } void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) { @@ -3790,6 +3678,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* inv codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); DCHECK(!codegen_->IsLeafMethod()); } + + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 8); } void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { @@ -3798,25 +3688,26 @@ void LocationsBuilderARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) void InstructionCodeGeneratorARMVIXL::VisitInvokePolymorphic(HInvokePolymorphic* invoke) { codegen_->GenerateInvokePolymorphicCall(invoke); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 9); } void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -3831,11 +3722,11 @@ void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { Location out = locations->Out(); Location in = locations->InAt(0); switch (neg->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // out.lo = 0 - in.lo (and update the carry/borrow (C) flag) __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0); // We cannot emit an RSC (Reverse Subtract with Carry) @@ -3849,8 +3740,8 @@ void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Vneg(OutputVRegister(neg), InputVRegister(neg)); break; @@ -3860,82 +3751,46 @@ void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) { } void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; // The float-to-long, double-to-long and long-to-float type conversions // rely on a call to the runtime. LocationSummary::CallKind call_kind = - (((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) - && result_type == Primitive::kPrimLong) - || (input_type == Primitive::kPrimLong && result_type == Primitive::kPrimFloat)) + (((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64) + && result_type == DataType::Type::kInt64) + || (input_type == DataType::Type::kInt64 && result_type == DataType::Type::kFloat32)) ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); - - // The Java language does not allow treating boolean as an integral type but - // our bit representation makes it safe. + new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); switch (result_type) { - case Primitive::kPrimByte: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to byte is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimShort: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK(DataType::IsIntegralType(input_type)) << input_type; + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -3947,29 +3802,26 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: { - // Processing a Dex `float-to-long' instruction. + case DataType::Type::kFloat32: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(LocationFrom(r0, r1)); break; } - case Primitive::kPrimDouble: { - // Processing a Dex `double-to-long' instruction. + case DataType::Type::kFloat64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1))); @@ -3983,41 +3835,19 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimChar: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to char is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `int-to-char' instruction. - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: - switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: { - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); @@ -4025,8 +3855,7 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { break; } - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -4034,32 +3863,29 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -4067,7 +3893,7 @@ void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) { default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; default: @@ -4080,24 +3906,40 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve LocationSummary* locations = conversion->GetLocations(); Location out = locations->Out(); Location in = locations->InAt(0); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; switch (result_type) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to byte is a result of code transformations. - __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8); + break; + case DataType::Type::kInt64: + __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt8: + switch (input_type) { + case DataType::Type::kUint8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 8); break; + case DataType::Type::kInt64: + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8); + break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -4105,20 +3947,32 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve } break; - case Primitive::kPrimShort: + case DataType::Type::kUint16: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); + case DataType::Type::kInt8: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); + break; + case DataType::Type::kInt64: + __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt16: + switch (input_type) { + case DataType::Type::kUint16: + case DataType::Type::kInt32: __ Sbfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); break; + case DataType::Type::kInt64: + __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); + break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -4126,10 +3980,9 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: DCHECK(out.IsRegister()); if (in.IsRegisterPair()) { __ Mov(OutputRegister(conversion), LowRegisterFrom(in)); @@ -4146,16 +3999,14 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve } break; - case Primitive::kPrimFloat: { - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: { vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0)); __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0)); __ Vmov(OutputRegister(conversion), temp); break; } - case Primitive::kPrimDouble: { - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: { vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0)); __ Vcvt(S32, F64, temp_s, DRegisterFrom(in)); __ Vmov(OutputRegister(conversion), temp_s); @@ -4168,15 +4019,14 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: DCHECK(out.IsRegisterPair()); DCHECK(in.IsRegister()); __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0)); @@ -4184,14 +4034,12 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-long' instruction. + case DataType::Type::kFloat32: codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-long' instruction. + case DataType::Type::kFloat64: codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; @@ -4202,74 +4050,46 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve } break; - case Primitive::kPrimChar: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to char is a result of code transformations. - __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16); - break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `int-to-char' instruction. - __ Ubfx(OutputRegister(conversion), InputRegisterAt(conversion, 0), 0, 16); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: { - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0)); __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion)); break; - } - case Primitive::kPrimLong: - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: codegen_->InvokeRuntime(kQuickL2f, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickL2f, float, int64_t>(); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in)); break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: { - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0)); __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out)); break; - } - case Primitive::kPrimLong: { - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: { vixl32::Register low = LowRegisterFrom(in); vixl32::Register high = HighRegisterFrom(in); vixl32::SRegister out_s = LowSRegisterFrom(out); @@ -4291,15 +4111,14 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve break; } - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0)); break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; default: @@ -4310,24 +4129,24 @@ void InstructionCodeGeneratorARMVIXL::VisitTypeConversion(HTypeConversion* conve void LocationsBuilderARMVIXL::VisitAdd(HAdd* add) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ArmEncodableConstantOrRegister(add->InputAt(1), ADD)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -4346,12 +4165,12 @@ void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { Location second = locations->InAt(1); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { __ Add(OutputRegister(add), InputRegisterAt(add, 0), InputOperandAt(add, 1)); } break; - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsConstant()) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); GenerateAddLongConst(out, first, value); @@ -4363,8 +4182,8 @@ void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1)); break; @@ -4375,23 +4194,23 @@ void InstructionCodeGeneratorARMVIXL::VisitAdd(HAdd* add) { void LocationsBuilderARMVIXL::VisitSub(HSub* sub) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(sub->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ArmEncodableConstantOrRegister(sub->InputAt(1), SUB)); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -4408,12 +4227,12 @@ void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { Location first = locations->InAt(0); Location second = locations->InAt(1); switch (sub->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1)); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsConstant()) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); GenerateAddLongConst(out, first, -value); @@ -4425,8 +4244,8 @@ void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1)); break; @@ -4437,18 +4256,18 @@ void InstructionCodeGeneratorARMVIXL::VisitSub(HSub* sub) { void LocationsBuilderARMVIXL::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -4466,11 +4285,11 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { Location first = locations->InAt(0); Location second = locations->InAt(1); switch (mul->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { __ Mul(OutputRegister(mul), InputRegisterAt(mul, 0), InputRegisterAt(mul, 1)); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { vixl32::Register out_hi = HighRegisterFrom(out); vixl32::Register out_lo = LowRegisterFrom(out); vixl32::Register in1_hi = HighRegisterFrom(first); @@ -4503,8 +4322,8 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1)); break; @@ -4515,7 +4334,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMul(HMul* mul) { void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); Location second = instruction->GetLocations()->InAt(1); DCHECK(second.IsConstant()); @@ -4538,7 +4357,7 @@ void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* inst void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -4572,7 +4391,7 @@ void InstructionCodeGeneratorARMVIXL::DivRemByPowerOfTwo(HBinaryOperation* instr void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -4615,7 +4434,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemWithAnyConstant(HBinaryOpera void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral( HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32); Location second = instruction->GetLocations()->InAt(1); DCHECK(second.IsConstant()); @@ -4635,21 +4454,21 @@ void InstructionCodeGeneratorARMVIXL::GenerateDivRemConstantIntegral( void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - if (div->GetResultType() == Primitive::kPrimLong) { + if (div->GetResultType() == DataType::Type::kInt64) { // pLdiv runtime call. call_kind = LocationSummary::kCallOnMainOnly; - } else if (div->GetResultType() == Primitive::kPrimInt && div->InputAt(1)->IsConstant()) { + } else if (div->GetResultType() == DataType::Type::kInt32 && div->InputAt(1)->IsConstant()) { // sdiv will be replaced by other instruction sequence. - } else if (div->GetResultType() == Primitive::kPrimInt && + } else if (div->GetResultType() == DataType::Type::kInt32 && !codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { // pIdivmod runtime call. call_kind = LocationSummary::kCallOnMainOnly; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); switch (div->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (div->InputAt(1)->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); @@ -4677,7 +4496,7 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); @@ -4686,8 +4505,8 @@ void LocationsBuilderARMVIXL::VisitDiv(HDiv* div) { locations->SetOut(LocationFrom(r0, r1)); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -4704,7 +4523,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { Location rhs = div->GetLocations()->InAt(1); switch (div->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (rhs.IsConstant()) { GenerateDivRemConstantIntegral(div); } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { @@ -4721,7 +4540,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs))); DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs))); @@ -4735,8 +4554,8 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1)); break; @@ -4746,23 +4565,23 @@ void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) { } void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); // Most remainders are implemented in the runtime. LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly; - if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) { + if (rem->GetResultType() == DataType::Type::kInt32 && rem->InputAt(1)->IsConstant()) { // sdiv will be replaced by other instruction sequence. call_kind = LocationSummary::kNoCall; - } else if ((rem->GetResultType() == Primitive::kPrimInt) + } else if ((rem->GetResultType() == DataType::Type::kInt32) && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) { // Have hardware divide instruction for int, do it with three instructions. call_kind = LocationSummary::kNoCall; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (rem->InputAt(1)->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); @@ -4791,7 +4610,7 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); @@ -4801,7 +4620,7 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { locations->SetOut(LocationFrom(r2, r3)); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1))); @@ -4809,7 +4628,7 @@ void LocationsBuilderARMVIXL::VisitRem(HRem* rem) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom( calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1))); @@ -4828,9 +4647,9 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { LocationSummary* locations = rem->GetLocations(); Location second = locations->InAt(1); - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { vixl32::Register reg1 = InputRegisterAt(rem, 0); vixl32::Register out_reg = OutputRegister(rem); if (second.IsConstant()) { @@ -4855,19 +4674,19 @@ void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc()); CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; @@ -4886,18 +4705,19 @@ void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) { DivZeroCheckSlowPathARMVIXL* slow_path = - new (GetGraph()->GetArena()) DivZeroCheckSlowPathARMVIXL(instruction); + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(0); switch (instruction->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (value.IsRegister()) { __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); } else { @@ -4908,7 +4728,7 @@ void InstructionCodeGeneratorARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instructi } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (value.IsRegisterPair()) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); @@ -5023,15 +4843,15 @@ void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) { void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); switch (ror->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); if (ror->InputAt(1)->IsConstant()) { locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant())); @@ -5049,13 +4869,13 @@ void LocationsBuilderARMVIXL::VisitRor(HRor* ror) { } void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) { - Primitive::Type type = ror->GetResultType(); + DataType::Type type = ror->GetResultType(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { HandleIntegerRotate(ror); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { HandleLongRotate(ror); break; } @@ -5069,10 +4889,10 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); if (op->InputAt(1)->IsConstant()) { locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); @@ -5085,7 +4905,7 @@ void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); if (op->InputAt(1)->IsConstant()) { locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant())); @@ -5112,9 +4932,9 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { Location first = locations->InAt(0); Location second = locations->InAt(1); - Primitive::Type type = op->GetResultType(); + DataType::Type type = op->GetResultType(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { vixl32::Register out_reg = OutputRegister(op); vixl32::Register first_reg = InputRegisterAt(op, 0); if (second.IsRegister()) { @@ -5143,7 +4963,7 @@ void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { vixl32::Register o_h = HighRegisterFrom(out); vixl32::Register o_l = LowRegisterFrom(out); @@ -5299,8 +5119,8 @@ void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) { } void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); if (instruction->IsStringAlloc()) { locations->AddTemp(LocationFrom(kMethodRegister)); } else { @@ -5329,11 +5149,12 @@ void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 10); } void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetOut(LocationFrom(r0)); locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); @@ -5348,11 +5169,12 @@ void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) { codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>(); DCHECK(!codegen_->IsLeafMethod()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 11); } void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -5369,7 +5191,7 @@ void InstructionCodeGeneratorARMVIXL::VisitParameterValue( void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(LocationFrom(kMethodRegister)); } @@ -5380,7 +5202,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod( void LocationsBuilderARMVIXL::VisitNot(HNot* not_) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -5390,11 +5212,11 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { Location out = locations->Out(); Location in = locations->InAt(0); switch (not_->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0)); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in)); __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in)); break; @@ -5406,7 +5228,7 @@ void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) { void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -5417,22 +5239,23 @@ void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) { void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // Output overlaps because it is written before doing the low comparison. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister()); @@ -5451,21 +5274,22 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { vixl32::Label less, greater, done; vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done); - Primitive::Type type = compare->InputAt(0)->GetType(); - vixl32::Condition less_cond = vixl32::Condition(kNone); + DataType::Type type = compare->InputAt(0)->GetType(); + vixl32::Condition less_cond = vixl32::Condition::None(); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags. __ Mov(out, 0); __ Cmp(RegisterFrom(left), RegisterFrom(right)); // Signed compare. less_cond = lt; break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right)); // Signed compare. __ B(lt, &less, /* far_target */ false); __ B(gt, &greater, /* far_target */ false); @@ -5475,8 +5299,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { less_cond = lo; break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { __ Mov(out, 0); GenerateVcmp(compare, codegen_); // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS). @@ -5506,7 +5330,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) { void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -5584,17 +5408,17 @@ void LocationsBuilderARMVIXL::HandleFieldSet( DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - Primitive::Type field_type = field_info.GetFieldType(); - if (Primitive::IsFloatingPointType(field_type)) { + DataType::Type field_type = field_info.GetFieldType(); + if (DataType::IsFloatingPointType(field_type)) { locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::RequiresRegister()); } - bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble; + bool is_wide = field_type == DataType::Type::kInt64 || field_type == DataType::Type::kFloat64; bool generate_volatile = field_info.IsVolatile() && is_wide && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); @@ -5615,7 +5439,7 @@ void LocationsBuilderARMVIXL::HandleFieldSet( locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); - if (field_type == Primitive::kPrimDouble) { + if (field_type == DataType::Type::kFloat64) { // For doubles we need two more registers to copy the value. locations->AddTemp(LocationFrom(r2)); locations->AddTemp(LocationFrom(r3)); @@ -5634,7 +5458,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); @@ -5644,25 +5468,23 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, } switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { - GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset); + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { + StoreOperandType operand_type = GetStoreOperandType(field_type); + GetAssembler()->StoreToOffset(operand_type, RegisterFrom(value), base, offset); break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { - GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset); - break; - } - - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (kPoisonHeapReferences && needs_write_barrier) { // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not // need poisoning. - DCHECK_EQ(field_type, Primitive::kPrimNot); + DCHECK_EQ(field_type, DataType::Type::kReference); vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); __ Mov(temp, RegisterFrom(value)); GetAssembler()->PoisonHeapReference(temp); @@ -5673,7 +5495,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicStore(base, offset, @@ -5689,12 +5511,12 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { vixl32::DRegister value_reg = DRegisterFrom(value); if (is_volatile && !atomic_ldrd_strd) { vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0)); @@ -5716,13 +5538,15 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } // Longs and doubles are handled in the switch. - if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) { + if (field_type != DataType::Type::kInt64 && field_type != DataType::Type::kFloat64) { // TODO(VIXL): Here and for other calls to `MaybeRecordImplicitNullCheck` in this method, we // should use a scope and the assembler to emit the store instruction to guarantee that we // record the pc at the correct position. But the `Assembler` does not automatically handle @@ -5747,29 +5571,30 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (field_info.GetFieldType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_field_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); bool volatile_for_double = field_info.IsVolatile() - && (field_info.GetFieldType() == Primitive::kPrimDouble) + && (field_info.GetFieldType() == DataType::Type::kFloat64) && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); // The output overlaps in case of volatile long: we don't want the // code generated by GenerateWideAtomicLoad to overwrite the // object's location. Likewise, in the case of an object field get // with read barriers enabled, we do not want the load to overwrite // the object's location, as we need it to emit the read barrier. - bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) || + bool overlap = + (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) || object_field_get_with_read_barrier; - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { locations->SetOut(Location::RequiresRegister(), @@ -5803,7 +5628,7 @@ void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction, } Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) { - DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType(); + DCHECK(DataType::IsFloatingPointType(input->GetType())) << input->GetType(); if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) || (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) { return Location::ConstantLocation(input->AsConstant()); @@ -5814,7 +5639,7 @@ Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* inpu Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode) { - DCHECK(!Primitive::IsFloatingPointType(constant->GetType())); + DCHECK(!DataType::IsFloatingPointType(constant->GetType())); if (constant->IsConstant() && CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) { return Location::ConstantLocation(constant->AsConstant()); @@ -5822,42 +5647,13 @@ Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* c return Location::RequiresRegister(); } -bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, - Opcode opcode) { - uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); - if (Primitive::Is64BitType(input_cst->GetType())) { - Opcode high_opcode = opcode; - SetCc low_set_cc = kCcDontCare; - switch (opcode) { - case SUB: - // Flip the operation to an ADD. - value = -value; - opcode = ADD; - FALLTHROUGH_INTENDED; - case ADD: - if (Low32Bits(value) == 0u) { - return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare); - } - high_opcode = ADC; - low_set_cc = kCcSet; - break; - default: - break; - } - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) && - CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare); - } else { - return CanEncodeConstantAsImmediate(Low32Bits(value), opcode); - } -} - -// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization -// enabled. -bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, - Opcode opcode, - SetCc set_cc) { - ArmVIXLAssembler* assembler = codegen_->GetAssembler(); - if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) { +static bool CanEncode32BitConstantAsImmediate( + CodeGeneratorARMVIXL* codegen, + uint32_t value, + Opcode opcode, + vixl32::FlagsUpdate flags_update = vixl32::FlagsUpdate::DontCare) { + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + if (assembler->ShifterOperandCanHold(opcode, value, flags_update)) { return true; } Opcode neg_opcode = kNoOperand; @@ -5874,13 +5670,41 @@ bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value, return false; } - if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) { + if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, flags_update)) { return true; } return opcode == AND && IsPowerOfTwo(value + 1); } +bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode) { + uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst)); + if (DataType::Is64BitType(input_cst->GetType())) { + Opcode high_opcode = opcode; + vixl32::FlagsUpdate low_flags_update = vixl32::FlagsUpdate::DontCare; + switch (opcode) { + case SUB: + // Flip the operation to an ADD. + value = -value; + opcode = ADD; + FALLTHROUGH_INTENDED; + case ADD: + if (Low32Bits(value) == 0u) { + return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), opcode); + } + high_opcode = ADC; + low_flags_update = vixl32::FlagsUpdate::SetFlags; + break; + default: + break; + } + return CanEncode32BitConstantAsImmediate(codegen_, High32Bits(value), high_opcode) && + CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode, low_flags_update); + } else { + return CanEncode32BitConstantAsImmediate(codegen_, Low32Bits(value), opcode); + } +} + void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); @@ -5890,31 +5714,23 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); - Primitive::Type field_type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type load_type = instruction->GetType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (field_type) { - case Primitive::kPrimBoolean: - GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset); - break; - - case Primitive::kPrimByte: - GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset); - break; - - case Primitive::kPrimShort: - GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset); - break; - - case Primitive::kPrimChar: - GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset); - break; - - case Primitive::kPrimInt: - GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset); + switch (load_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { + LoadOperandType operand_type = GetLoadOperandType(load_type); + GetAssembler()->LoadFromOffset(operand_type, RegisterFrom(out), base, offset); break; + } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { Location temp_loc = locations->GetTemp(0); @@ -5939,7 +5755,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (is_volatile && !atomic_ldrd_strd) { GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out)); } else { @@ -5947,11 +5763,11 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset); break; - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { vixl32::DRegister out_dreg = DRegisterFrom(out); if (is_volatile && !atomic_ldrd_strd) { vixl32::Register lo = RegisterFrom(locations->GetTemp(0)); @@ -5968,12 +5784,14 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); } - if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) { + if (load_type == DataType::Type::kReference || load_type == DataType::Type::kFloat64) { // Potential implicit null checks, in the case of reference or // double fields, are handled in the previous switch statement. } else { @@ -5987,7 +5805,7 @@ void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction, } if (is_volatile) { - if (field_type == Primitive::kPrimNot) { + if (load_type == DataType::Type::kReference) { // Memory barriers, in the case of references, are also handled // in the previous switch statement. } else { @@ -6117,7 +5935,7 @@ void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) { void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) { NullCheckSlowPathARMVIXL* slow_path = - new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction); + new (GetScopedAllocator()) NullCheckSlowPathARMVIXL(instruction); AddSlowPath(slow_path); __ CompareAndBranchIfZero(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel()); } @@ -6126,116 +5944,68 @@ void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) { codegen_->GenerateNullCheck(instruction); } -static LoadOperandType GetLoadOperandType(Primitive::Type type) { - switch (type) { - case Primitive::kPrimNot: - return kLoadWord; - case Primitive::kPrimBoolean: - return kLoadUnsignedByte; - case Primitive::kPrimByte: - return kLoadSignedByte; - case Primitive::kPrimChar: - return kLoadUnsignedHalfword; - case Primitive::kPrimShort: - return kLoadSignedHalfword; - case Primitive::kPrimInt: - return kLoadWord; - case Primitive::kPrimLong: - return kLoadWordPair; - case Primitive::kPrimFloat: - return kLoadSWord; - case Primitive::kPrimDouble: - return kLoadDWord; - default: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } -} - -static StoreOperandType GetStoreOperandType(Primitive::Type type) { - switch (type) { - case Primitive::kPrimNot: - return kStoreWord; - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - return kStoreByte; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - return kStoreHalfword; - case Primitive::kPrimInt: - return kStoreWord; - case Primitive::kPrimLong: - return kStoreWordPair; - case Primitive::kPrimFloat: - return kStoreSWord; - case Primitive::kPrimDouble: - return kStoreDWord; - default: - LOG(FATAL) << "Unreachable type " << type; - UNREACHABLE(); - } -} - -void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(Primitive::Type type, +void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(DataType::Type type, Location out_loc, vixl32::Register base, vixl32::Register reg_index, vixl32::Condition cond) { - uint32_t shift_count = Primitive::ComponentSizeShift(type); + uint32_t shift_count = DataType::SizeShift(type); MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); switch (type) { - case Primitive::kPrimByte: - __ Ldrsb(cond, RegisterFrom(out_loc), mem_address); - break; - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: __ Ldrb(cond, RegisterFrom(out_loc), mem_address); break; - case Primitive::kPrimShort: - __ Ldrsh(cond, RegisterFrom(out_loc), mem_address); + case DataType::Type::kInt8: + __ Ldrsb(cond, RegisterFrom(out_loc), mem_address); break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: __ Ldrh(cond, RegisterFrom(out_loc), mem_address); break; - case Primitive::kPrimNot: - case Primitive::kPrimInt: + case DataType::Type::kInt16: + __ Ldrsh(cond, RegisterFrom(out_loc), mem_address); + break; + case DataType::Type::kReference: + case DataType::Type::kInt32: __ Ldr(cond, RegisterFrom(out_loc), mem_address); break; // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types. - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: default: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } } -void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(Primitive::Type type, +void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(DataType::Type type, Location loc, vixl32::Register base, vixl32::Register reg_index, vixl32::Condition cond) { - uint32_t shift_count = Primitive::ComponentSizeShift(type); + uint32_t shift_count = DataType::SizeShift(type); MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count); switch (type) { - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: __ Strb(cond, RegisterFrom(loc), mem_address); break; - case Primitive::kPrimShort: - case Primitive::kPrimChar: + case DataType::Type::kUint16: + case DataType::Type::kInt16: __ Strh(cond, RegisterFrom(loc), mem_address); break; - case Primitive::kPrimNot: - case Primitive::kPrimInt: + case DataType::Type::kReference: + case DataType::Type::kInt32: __ Str(cond, RegisterFrom(loc), mem_address); break; // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types. - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: default: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); @@ -6244,18 +6014,18 @@ void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(Primitive::Type type, void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps in the case of an object array get with @@ -6276,7 +6046,7 @@ void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) { // constant index loads we need a temporary only if the offset is too big. uint32_t offset = CodeGenerator::GetArrayDataOffset(instruction); uint32_t index = instruction->GetIndex()->AsIntConstant()->GetValue(); - offset += index << Primitive::ComponentSizeShift(Primitive::kPrimNot); + offset += index << DataType::SizeShift(DataType::Type::kReference); if (offset >= kReferenceLoadMinFarOffset) { locations->AddTemp(Location::RequiresRegister()); } @@ -6305,18 +6075,19 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { Location index = locations->InAt(1); Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { vixl32::Register length; if (maybe_compressed_char_at) { length = RegisterFrom(locations->GetTemp(0)); @@ -6339,7 +6110,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { data_offset + const_index); __ B(final_label); __ Bind(&uncompressed_load); - GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar), + GetAssembler()->LoadFromOffset(GetLoadOperandType(DataType::Type::kUint16), RegisterFrom(out_loc), obj, data_offset + (const_index << 1)); @@ -6347,7 +6118,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { __ Bind(&done); } } else { - uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type)); + uint32_t full_offset = data_offset + (const_index << DataType::SizeShift(type)); LoadOperandType load_type = GetLoadOperandType(type); GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset); @@ -6389,7 +6160,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { // The read barrier instrumentation of object ArrayGet // instructions does not support the HIntermediateAddress // instruction. @@ -6407,7 +6178,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); if (index.IsConstant()) { // Array load with a constant index can be treated as a field load. - data_offset += Int32ConstantFrom(index) << Primitive::ComponentSizeShift(type); + data_offset += Int32ConstantFrom(index) << DataType::SizeShift(type); codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, out_loc, obj, @@ -6466,7 +6237,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; @@ -6480,7 +6251,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { vixl32::SRegister out = SRegisterFrom(out_loc); if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_4) + data_offset; @@ -6494,7 +6265,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (index.IsConstant()) { size_t offset = (Int32ConstantFrom(index) << TIMES_8) + data_offset; GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset); @@ -6507,12 +6278,14 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. } else if (!maybe_compressed_char_at) { @@ -6523,13 +6296,13 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : @@ -6537,7 +6310,7 @@ void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(value_type)) { + if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::RequiresFpuRegister()); } else { locations->SetInAt(2, Location::RequiresRegister()); @@ -6553,26 +6326,27 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { LocationSummary* locations = instruction->GetLocations(); vixl32::Register array = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); uint32_t data_offset = - mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value(); + mirror::Array::DataOffset(DataType::Size(value_type)).Uint32Value(); Location value_loc = locations->InAt(2); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (index.IsConstant()) { int32_t const_index = Int32ConstantFrom(index); uint32_t full_offset = - data_offset + (const_index << Primitive::ComponentSizeShift(value_type)); + data_offset + (const_index << DataType::SizeShift(value_type)); StoreOperandType store_type = GetStoreOperandType(value_type); GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset); } else { @@ -6596,7 +6370,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { vixl32::Register value = RegisterFrom(value_loc); // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet. // See the comment in instruction_simplifier_shared.cc. @@ -6636,7 +6410,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { SlowPathCodeARMVIXL* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARMVIXL(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { vixl32::Label non_zero; @@ -6709,7 +6483,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not // need poisoning. - DCHECK_EQ(value_type, Primitive::kPrimNot); + DCHECK_EQ(value_type, DataType::Type::kReference); __ Mov(temp1, value); GetAssembler()->PoisonHeapReference(temp1); source = temp1; @@ -6750,7 +6524,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Location value = locations->InAt(2); if (index.IsConstant()) { size_t offset = @@ -6765,7 +6539,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { Location value = locations->InAt(2); DCHECK(value.IsFpuRegister()); if (index.IsConstant()) { @@ -6780,7 +6554,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { Location value = locations->InAt(2); DCHECK(value.IsFpuRegisterPair()); if (index.IsConstant()) { @@ -6795,13 +6569,15 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << value_type; UNREACHABLE(); } // Objects are handled in the switch. - if (value_type != Primitive::kPrimNot) { + if (value_type != DataType::Type::kReference) { // TODO(VIXL): Ensure we record the pc position immediately after the preceding store // instruction. codegen_->MaybeRecordImplicitNullCheck(instruction); @@ -6810,7 +6586,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) { void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -6834,7 +6610,7 @@ void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset())); @@ -6897,7 +6673,7 @@ void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction int32_t index = Int32ConstantFrom(index_loc); if (index < 0 || index >= length) { SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); } else { @@ -6908,13 +6684,13 @@ void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction } SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); __ Cmp(RegisterFrom(index_loc), length); codegen_->AddSlowPath(slow_path); __ B(hs, slow_path->GetEntryLabel()); } else { SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction); + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathARMVIXL(instruction); __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0)); codegen_->AddSlowPath(slow_path); __ B(ls, slow_path->GetEntryLabel()); @@ -6944,12 +6720,19 @@ void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRI } void InstructionCodeGeneratorARMVIXL::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -6965,6 +6748,7 @@ void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instructi return; } GenerateSuspendCheck(instruction, nullptr); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 12); } void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction, @@ -6972,12 +6756,12 @@ void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instru SuspendCheckSlowPathARMVIXL* slow_path = down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath()); if (slow_path == nullptr) { - slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARMVIXL(instruction, successor); + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathARMVIXL(instruction, successor); instruction->SetSlowPath(slow_path); codegen_->AddSlowPath(slow_path); if (successor != nullptr) { DCHECK(successor->IsLoopHeader()); - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); } } else { DCHECK_EQ(slow_path->GetSuccessor(), successor); @@ -7256,6 +7040,7 @@ HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7286,7 +7071,7 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -7298,9 +7083,6 @@ void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - // Note that IP may be clobbered by saving/restoring the live register (only one thanks - // to the custom calling convention) or by marking, so we request a different temp. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConventionARMVIXL calling_convention; caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); @@ -7326,6 +7108,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ HLoadClass::LoadKind load_kind = cls->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kRuntimeCall) { codegen_->GenerateLoadClassRuntimeCall(cls); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 13); return; } DCHECK(!cls->NeedsAccessCheck()); @@ -7355,7 +7138,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); break; } @@ -7367,14 +7150,25 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + __ Ldr(out, MemOperand(out, /* offset */ 0)); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Sub(out, out, Operand(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { - vixl32::Register temp = (!kUseReadBarrier || kUseBakerReadBarrier) - ? RegisterFrom(locations->GetTemp(0)) - : out; CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); - codegen_->EmitMovwMovtPlaceholder(labels, temp); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* offset */ 0, read_barrier_option); + codegen_->EmitMovwMovtPlaceholder(labels, out); + GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option); generate_null_check = true; break; } @@ -7394,8 +7188,9 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - LoadClassSlowPathARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); + LoadClassSlowPathARMVIXL* slow_path = + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); @@ -7405,12 +7200,13 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_ } else { __ Bind(slow_path->GetExitLabel()); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 14); } } void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -7420,10 +7216,10 @@ void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. LoadClassSlowPathARMVIXL* slow_path = - new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), - check, - check->GetDexPc(), - /* do_clinit */ true); + new (codegen_->GetScopedAllocator()) LoadClassSlowPathARMVIXL(check->GetLoadClass(), + check, + check->GetDexPc(), + /* do_clinit */ true); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0)); } @@ -7432,12 +7228,15 @@ void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck( LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); - GetAssembler()->LoadFromOffset(kLoadWord, - temp, - class_reg, - mirror::Class::StatusOffset().Int32Value()); - __ Cmp(temp, mirror::Class::kStatusInitialized); - __ B(lt, slow_path->GetEntryLabel()); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + GetAssembler()->LoadFromOffset(kLoadUnsignedByte, temp, class_reg, status_byte_offset); + __ Cmp(temp, shifted_initialized_value); + __ B(lo, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we may be in a situation where caches are not synced // properly. Therefore, we do a memory fence. __ Dmb(ISH); @@ -7448,6 +7247,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -7463,7 +7263,7 @@ HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind( void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(LocationFrom(r0)); @@ -7472,9 +7272,6 @@ void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need, including temps. - // Note that IP may be clobbered by saving/restoring the live register (only one thanks - // to the custom calling convention) or by marking, so we request a different temp. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConventionARMVIXL calling_convention; caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0))); @@ -7503,31 +7300,37 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); codegen_->EmitMovwMovtPlaceholder(labels, out); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + __ Ldr(out, MemOperand(out, /* offset */ 0)); + return; } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); - vixl32::Register temp = (!kUseReadBarrier || kUseBakerReadBarrier) - ? RegisterFrom(locations->GetTemp(0)) - : out; CodeGeneratorARMVIXL::PcRelativePatchInfo* labels = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); - codegen_->EmitMovwMovtPlaceholder(labels, temp); - GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->EmitMovwMovtPlaceholder(labels, out); + GenerateGcRootFieldLoad(load, out_loc, out, /* offset */ 0, kCompilerReadBarrierOption); LoadStringSlowPathARMVIXL* slow_path = - new (GetGraph()->GetArena()) LoadStringSlowPathARMVIXL(load); + new (codegen_->GetScopedAllocator()) LoadStringSlowPathARMVIXL(load); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 15); return; } case HLoadString::LoadKind::kJitTableAddress: { @@ -7548,6 +7351,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) NO_THRE __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_); codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc()); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 16); } static int32_t GetExceptionTlsOffset() { @@ -7556,7 +7360,7 @@ static int32_t GetExceptionTlsOffset() { void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -7567,7 +7371,7 @@ void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) { void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -7578,8 +7382,8 @@ void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear } void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -7619,11 +7423,12 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -7631,7 +7436,8 @@ void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -7675,13 +7481,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Cmp(out, cls); // We speculatively set the result to false without changing the condition @@ -7708,13 +7516,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. vixl32::Label loop; @@ -7724,7 +7534,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); __ Cmp(out, cls); @@ -7734,13 +7544,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. vixl32::Label loop, success; __ Bind(&loop); @@ -7751,7 +7563,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // This is essentially a null check, but it sets the condition flags to the // proper value for the code that follows the loop, i.e. not `eq`. __ Cmp(out, 1); @@ -7788,13 +7600,15 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. vixl32::Label exact_check; __ Cmp(out, cls); @@ -7805,7 +7619,7 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to the final label. __ CompareAndBranchIfZero(out, final_label, /* far_target */ false); GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -7848,8 +7662,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) kWithoutReadBarrier); __ Cmp(out, cls); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); __ Mov(out, 1); @@ -7877,8 +7691,8 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; @@ -7895,27 +7709,10 @@ void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) } void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); @@ -7942,21 +7739,10 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeARMVIXL* type_check_slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathARMVIXL( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); vixl32::Label done; @@ -8069,7 +7855,7 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type` // to further check that this component type is not a primitive type. GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset); - static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot"); + static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot"); __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel()); break; } @@ -8131,8 +7917,8 @@ void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) { } void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); } @@ -8146,6 +7932,7 @@ void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* i } else { CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>(); } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 17); } void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) { @@ -8162,9 +7949,9 @@ void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) { void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt - || instruction->GetResultType() == Primitive::kPrimLong); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32 + || instruction->GetResultType() == DataType::Type::kInt64); // Note: GVN reorders commutative operations to have the constant on the right hand side. locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode)); @@ -8185,9 +7972,9 @@ void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) { void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt - || instruction->GetResultType() == Primitive::kPrimLong); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32 + || instruction->GetResultType() == DataType::Type::kInt64); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -8200,7 +7987,7 @@ void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRi Location second = locations->InAt(1); Location out = locations->Out(); - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { vixl32::Register first_reg = RegisterFrom(first); vixl32::Register second_reg = RegisterFrom(second); vixl32::Register out_reg = RegisterFrom(out); @@ -8221,7 +8008,7 @@ void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRi return; } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); vixl32::Register first_low = LowRegisterFrom(first); vixl32::Register first_high = HighRegisterFrom(first); vixl32::Register second_low = LowRegisterFrom(second); @@ -8249,11 +8036,11 @@ void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRi void LocationsBuilderARMVIXL::VisitDataProcWithShifterOp( HDataProcWithShifterOp* instruction) { - DCHECK(instruction->GetType() == Primitive::kPrimInt || - instruction->GetType() == Primitive::kPrimLong); + DCHECK(instruction->GetType() == DataType::Type::kInt32 || + instruction->GetType() == DataType::Type::kInt64); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - const bool overlap = instruction->GetType() == Primitive::kPrimLong && + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + const bool overlap = instruction->GetType() == DataType::Type::kInt64 && HDataProcWithShifterOp::IsExtensionOp(instruction->GetOpKind()); locations->SetInAt(0, Location::RequiresRegister()); @@ -8268,22 +8055,44 @@ void InstructionCodeGeneratorARMVIXL::VisitDataProcWithShifterOp( const HInstruction::InstructionKind kind = instruction->GetInstrKind(); const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); - if (instruction->GetType() == Primitive::kPrimInt) { - DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); - - const vixl32::Register second = instruction->InputAt(1)->GetType() == Primitive::kPrimLong + if (instruction->GetType() == DataType::Type::kInt32) { + const vixl32::Register first = InputRegisterAt(instruction, 0); + const vixl32::Register output = OutputRegister(instruction); + const vixl32::Register second = instruction->InputAt(1)->GetType() == DataType::Type::kInt64 ? LowRegisterFrom(locations->InAt(1)) : InputRegisterAt(instruction, 1); - GenerateDataProcInstruction(kind, - OutputRegister(instruction), - InputRegisterAt(instruction, 0), - Operand(second, - ShiftFromOpKind(op_kind), - instruction->GetShiftAmount()), - codegen_); + if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { + DCHECK_EQ(kind, HInstruction::kAdd); + + switch (op_kind) { + case HDataProcWithShifterOp::kUXTB: + __ Uxtab(output, first, second); + break; + case HDataProcWithShifterOp::kUXTH: + __ Uxtah(output, first, second); + break; + case HDataProcWithShifterOp::kSXTB: + __ Sxtab(output, first, second); + break; + case HDataProcWithShifterOp::kSXTH: + __ Sxtah(output, first, second); + break; + default: + LOG(FATAL) << "Unexpected operation kind: " << op_kind; + UNREACHABLE(); + } + } else { + GenerateDataProcInstruction(kind, + output, + first, + Operand(second, + ShiftFromOpKind(op_kind), + instruction->GetShiftAmount()), + codegen_); + } } else { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { const vixl32::Register second = InputRegisterAt(instruction, 1); @@ -8380,13 +8189,11 @@ void InstructionCodeGeneratorARMVIXL::GenerateAddLongConst(Location out, return; } __ Adds(out_low, first_low, value_low); - if (GetAssembler()->ShifterOperandCanHold(ADC, value_high, kCcDontCare)) { + if (GetAssembler()->ShifterOperandCanHold(ADC, value_high)) { __ Adc(out_high, first_high, value_high); - } else if (GetAssembler()->ShifterOperandCanHold(SBC, ~value_high, kCcDontCare)) { - __ Sbc(out_high, first_high, ~value_high); } else { - LOG(FATAL) << "Unexpected constant " << value_high; - UNREACHABLE(); + DCHECK(GetAssembler()->ShifterOperandCanHold(SBC, ~value_high)); + __ Sbc(out_high, first_high, ~value_high); } } @@ -8399,7 +8206,7 @@ void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* i if (second.IsConstant()) { uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant())); uint32_t value_low = Low32Bits(value); - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { vixl32::Register first_reg = InputRegisterAt(instruction, 0); vixl32::Register out_reg = OutputRegister(instruction); if (instruction->IsAnd()) { @@ -8411,7 +8218,7 @@ void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* i GenerateEorConst(out_reg, first_reg, value_low); } } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); uint32_t value_high = High32Bits(value); vixl32::Register first_low = LowRegisterFrom(first); vixl32::Register first_high = HighRegisterFrom(first); @@ -8432,7 +8239,7 @@ void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* i return; } - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { vixl32::Register first_reg = InputRegisterAt(instruction, 0); vixl32::Register second_reg = InputRegisterAt(instruction, 1); vixl32::Register out_reg = OutputRegister(instruction); @@ -8445,7 +8252,7 @@ void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* i __ Eor(out_reg, first_reg, second_reg); } } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); vixl32::Register first_low = LowRegisterFrom(first); vixl32::Register first_high = HighRegisterFrom(first); vixl32::Register second_low = LowRegisterFrom(second); @@ -8594,7 +8401,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Slow path marking the GC root `root`. The entrypoint will // be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); + new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathARMVIXL(instruction, root); codegen_->AddSlowPath(slow_path); // /* GcRoot<mirror::Object> */ root = *(obj + offset) @@ -8625,6 +8432,7 @@ void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad( // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. } + codegen_->MaybeGenerateMarkingRegisterCheck(/* code */ 18); } void CodeGeneratorARMVIXL::MaybeAddBakerCcEntrypointTempForFields(LocationSummary* locations) { @@ -8669,7 +8477,7 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i // gray_return_address: DCHECK_ALIGNED(offset, sizeof(mirror::HeapReference<mirror::Object>)); - vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); bool narrow = CanEmitNarrowLdr(ref_reg, obj, offset); vixl32::Register base = obj; if (offset >= kReferenceLoadMinFarOffset) { @@ -8689,31 +8497,34 @@ void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(HInstruction* i base.GetCode(), obj.GetCode(), narrow); vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); - if (needs_null_check) { - MaybeRecordImplicitNullCheck(instruction); - } - // Note: We need a specific width for the unpoisoning NEG. - if (kPoisonHeapReferences) { - if (narrow) { - // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). - __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); - } else { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(EncodingSize(narrow ? Narrow : Wide), ref_reg, MemOperand(base, offset)); + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + // Note: We need a specific width for the unpoisoning NEG. + if (kPoisonHeapReferences) { + if (narrow) { + // The only 16-bit encoding is T1 which sets flags outside IT block (i.e. RSBS, not RSB). + __ rsbs(EncodingSize(Narrow), ref_reg, ref_reg, Operand(0)); + } else { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET + : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - narrow ? BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET - : BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET); + MaybeGenerateMarkingRegisterCheck(/* code */ 19, /* temp_loc */ LocationFrom(ip)); return; } @@ -8762,9 +8573,9 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i // gray_return_address: DCHECK(index.IsValid()); - vixl32::Register index_reg = RegisterFrom(index, Primitive::kPrimInt); - vixl32::Register ref_reg = RegisterFrom(ref, Primitive::kPrimNot); - vixl32::Register data_reg = RegisterFrom(temp, Primitive::kPrimInt); // Raw pointer. + vixl32::Register index_reg = RegisterFrom(index, DataType::Type::kInt32); + vixl32::Register ref_reg = RegisterFrom(ref, DataType::Type::kReference); + vixl32::Register data_reg = RegisterFrom(temp, DataType::Type::kInt32); // Raw pointer. DCHECK(!data_reg.Is(kBakerCcEntrypointRegister)); UseScratchRegisterScope temps(GetVIXLAssembler()); @@ -8774,23 +8585,26 @@ void CodeGeneratorARMVIXL::GenerateArrayLoadWithBakerReadBarrier(HInstruction* i vixl32::Label* bne_label = NewBakerReadBarrierPatch(custom_data); __ Add(data_reg, obj, Operand(data_offset)); - vixl::EmissionCheckScope guard( - GetVIXLAssembler(), - (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); - vixl32::Label return_address; - EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); - __ cmp(mr, Operand(0)); - EmitPlaceholderBne(this, bne_label); - ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); - __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); - DCHECK(!needs_null_check); // The thunk cannot handle the null check. - // Note: We need a Wide NEG for the unpoisoning. - if (kPoisonHeapReferences) { - __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + { + vixl::EmissionCheckScope guard( + GetVIXLAssembler(), + (kPoisonHeapReferences ? 5u : 4u) * vixl32::kMaxInstructionSizeInBytes); + vixl32::Label return_address; + EmitAdrCode adr(GetVIXLAssembler(), lr, &return_address); + __ cmp(mr, Operand(0)); + EmitPlaceholderBne(this, bne_label); + ptrdiff_t old_offset = GetVIXLAssembler()->GetBuffer()->GetCursorOffset(); + __ ldr(ref_reg, MemOperand(data_reg, index_reg, vixl32::LSL, scale_factor)); + DCHECK(!needs_null_check); // The thunk cannot handle the null check. + // Note: We need a Wide NEG for the unpoisoning. + if (kPoisonHeapReferences) { + __ rsb(EncodingSize(Wide), ref_reg, ref_reg, Operand(0)); + } + __ Bind(&return_address); + DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), + BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); } - __ Bind(&return_address); - DCHECK_EQ(old_offset - GetVIXLAssembler()->GetBuffer()->GetCursorOffset(), - BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET); + MaybeGenerateMarkingRegisterCheck(/* code */ 20, /* temp_loc */ LocationFrom(ip)); return; } @@ -8836,7 +8650,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // Slow path marking the object `ref` when the GC is marking. The // entrypoint will be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( + new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierSlowPathARMVIXL( instruction, ref, obj, offset, index, scale_factor, needs_null_check, temp_reg); AddSlowPath(slow_path); @@ -8844,6 +8658,7 @@ void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(HInstructio // Fast path: the GC is not marking: just load the reference. GenerateRawReferenceLoad(instruction, ref, obj, offset, index, scale_factor, needs_null_check); __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ 21); } void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction* instruction, @@ -8882,7 +8697,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // Slow path updating the object reference at address `obj + field_offset` // when the GC is marking. The entrypoint will be loaded by the slow path code. SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( + new (GetScopedAllocator()) LoadReferenceWithBakerReadBarrierAndUpdateFieldSlowPathARMVIXL( instruction, ref, obj, @@ -8898,6 +8713,7 @@ void CodeGeneratorARMVIXL::UpdateReferenceFieldWithBakerReadBarrier(HInstruction // Fast path: the GC is not marking: nothing to do (the field is // up-to-date, and we don't need to load the reference). __ Bind(slow_path->GetExitLabel()); + MaybeGenerateMarkingRegisterCheck(/* code */ 22); } void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, @@ -8907,7 +8723,7 @@ void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, Location index, ScaleFactor scale_factor, bool needs_null_check) { - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; vixl32::Register ref_reg = RegisterFrom(ref, type); // If needed, vixl::EmissionCheckScope guards are used to ensure @@ -8959,6 +8775,20 @@ void CodeGeneratorARMVIXL::GenerateRawReferenceLoad(HInstruction* instruction, GetAssembler()->MaybeUnpoisonHeapReference(ref_reg); } +void CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck(int code, Location temp_loc) { + // The following condition is a compile-time one, so it does not have a run-time cost. + if (kEmitCompilerReadBarrier && kUseBakerReadBarrier && kIsDebugBuild) { + // The following condition is a run-time one; it is executed after the + // previous compile-time test, to avoid penalizing non-debug builds. + if (GetCompilerOptions().EmitRunTimeChecksInDebugMode()) { + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::Register temp = temp_loc.IsValid() ? RegisterFrom(temp_loc) : temps.Acquire(); + GetAssembler()->GenerateMarkingRegisterCheck(temp, + kMarkingRegisterCheckBreakCodeBaseCode + code); + } + } +} + void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, Location out, Location ref, @@ -8978,7 +8808,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeARMVIXL* slow_path = new (GetGraph()->GetArena()) + SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathARMVIXL(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -9014,7 +8844,7 @@ void CodeGeneratorARMVIXL::GenerateReadBarrierForRootSlow(HInstruction* instruct // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeARMVIXL* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathARMVIXL(instruction, out, root); AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); @@ -9070,7 +8900,7 @@ void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* labels = NewBootImageMethodPatch(invoke->GetTargetMethod()); vixl32::Register temp_reg = RegisterFrom(temp); EmitMovwMovtPlaceholder(labels, temp_reg); break; @@ -9175,37 +9005,40 @@ void CodeGeneratorARMVIXL::GenerateVirtualCall( } } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageMethodPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &boot_image_method_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch( MethodReference target_method) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, &method_bss_entry_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &boot_image_type_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index) { - return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, &type_bss_entry_patches_); +} + +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewBootImageStringPatch( + const DexFile& dex_file, dex::StringIndex string_index) { + return NewPcRelativePatch(&dex_file, string_index.index_, &boot_image_string_patches_); } -CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch( +CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewStringBssEntryPatch( const DexFile& dex_file, dex::StringIndex string_index) { - return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_); + return NewPcRelativePatch(&dex_file, string_index.index_, &string_bss_entry_patches_); } CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch( - const DexFile& dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { + const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches) { patches->emplace_back(dex_file, offset_or_index); return &patches->back(); } @@ -9223,8 +9056,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { @@ -9235,8 +9067,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitStringLiteral( VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), [this]() { @@ -9244,56 +9075,60 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateJitClassLiteral(const DexFil }); } -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorARMVIXL::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.add_pc_label.IsBound()); uint32_t add_pc_offset = dchecked_integral_cast<uint32_t>(info.add_pc_label.GetLocation()); // Add MOVW patch. DCHECK(info.movw_label.IsBound()); uint32_t movw_offset = dchecked_integral_cast<uint32_t>(info.movw_label.GetLocation()); - linker_patches->push_back(Factory(movw_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movw_offset, dex_file, add_pc_offset, offset_or_index)); // Add MOVT patch. DCHECK(info.movt_label.IsBound()); uint32_t movt_offset = dchecked_integral_cast<uint32_t>(info.movt_label.GetLocation()); - linker_patches->push_back(Factory(movt_offset, &dex_file, add_pc_offset, offset_or_index)); + linker_patches->push_back(Factory(movt_offset, dex_file, add_pc_offset, offset_or_index)); } } -void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_method_patches_.size() + /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_type_patches_.size() + /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() + - /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * boot_image_string_patches_.size() + + /* MOVW+MOVT for each entry */ 2u * string_bss_entry_patches_.size() + baker_read_barrier_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) { - linker_patches->push_back(LinkerPatch::BakerReadBarrierBranchPatch(info.label.GetLocation(), - info.custom_data)); + linker_patches->push_back(linker::LinkerPatch::BakerReadBarrierBranchPatch( + info.label.GetLocation(), info.custom_data)); } DCHECK_EQ(size, linker_patches->size()); } @@ -9310,7 +9145,7 @@ VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateUint32Literal( void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instr, LocationSummary::kNoCall); locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresRegister()); locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister()); @@ -9347,7 +9182,7 @@ void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATT // Simple implementation of packed switch - generate cascaded compare/jumps. void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold && codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) { @@ -9440,13 +9275,13 @@ void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_in } // Copy the result of a call into the given target. -void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) { +void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, DataType::Type type) { if (!trg.IsValid()) { - DCHECK_EQ(type, Primitive::kPrimVoid); + DCHECK_EQ(type, DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); Location return_loc = InvokeDexCallingConventionVisitorARMVIXL().GetReturnLocation(type); if (return_loc.Equals(trg)) { @@ -9455,13 +9290,13 @@ void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged // with the last branch. - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { TODO_VIXL32(FATAL); - } else if (type == Primitive::kPrimDouble) { + } else if (type == DataType::Type::kFloat64) { TODO_VIXL32(FATAL); } else { // Let the parallel move resolver take care of all of this. - HParallelMove parallel_move(GetGraph()->GetArena()); + HParallelMove parallel_move(GetGraph()->GetAllocator()); parallel_move.AddMove(return_loc, trg, type, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } @@ -9469,7 +9304,7 @@ void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type void LocationsBuilderARMVIXL::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -9512,17 +9347,13 @@ void CodeGeneratorARMVIXL::EmitJitRootPatches(uint8_t* code, const uint8_t* root for (const auto& entry : jit_string_patches_) { const StringReference& string_reference = entry.first; VIXLUInt32Literal* table_entry_literal = entry.second; - const auto it = jit_string_roots_.find(string_reference); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { const TypeReference& type_reference = entry.first; VIXLUInt32Literal* table_entry_literal = entry.second; - const auto it = jit_class_roots_.find(type_reference); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 01cf287f29..536da41d07 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -20,11 +20,11 @@ #include "base/enums.h" #include "code_generator.h" #include "common_arm.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" #include "driver/compiler_options.h" #include "nodes.h" -#include "string_reference.h" #include "parallel_move_resolver.h" -#include "type_reference.h" #include "utils/arm/assembler_arm_vixl.h" // TODO(VIXL): make vixl clean wrt -Wshadow. @@ -119,7 +119,7 @@ class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> { explicit JumpTableARMVIXL(HPackedSwitch* switch_instr) : switch_instr_(switch_instr), table_start_(), - bb_addresses_(switch_instr->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + bb_addresses_(switch_instr->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { uint32_t num_entries = switch_instr_->GetNumEntries(); for (uint32_t i = 0; i < num_entries; i++) { VIXLInt32Literal *lit = new VIXLInt32Literal(0, vixl32::RawLiteral::kManuallyPlaced); @@ -173,8 +173,8 @@ class InvokeDexCallingConventionVisitorARMVIXL : public InvokeDexCallingConventi InvokeDexCallingConventionVisitorARMVIXL() {} virtual ~InvokeDexCallingConventionVisitorARMVIXL() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type type) const OVERRIDE; Location GetMethodLocation() const OVERRIDE; private: @@ -194,20 +194,20 @@ class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention Location GetFieldIndexLocation() const OVERRIDE { return helpers::LocationFrom(vixl::aarch32::r0); } - Location GetReturnLocation(Primitive::Type type) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetReturnLocation(DataType::Type type) const OVERRIDE { + return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1) : helpers::LocationFrom(vixl::aarch32::r0); } - Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3) : (is_instance ? helpers::LocationFrom(vixl::aarch32::r2) : helpers::LocationFrom(vixl::aarch32::r1)); } - Location GetFpuLocation(Primitive::Type type) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetFpuLocation(DataType::Type type) const OVERRIDE { + return DataType::Is64BitType(type) ? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1) : helpers::LocationFrom(vixl::aarch32::s0); } @@ -287,7 +287,6 @@ class LocationsBuilderARMVIXL : public HGraphVisitor { Location ArithmeticZeroOrFpuRegister(HInstruction* input); Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode); bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode); - bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare); CodeGeneratorARMVIXL* const codegen_; InvokeDexCallingConventionVisitorARMVIXL parameter_visitor_; @@ -434,7 +433,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateFrameExit() OVERRIDE; void Bind(HBasicBlock* block) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -475,12 +474,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Helper method to move a 32-bit value between two locations. void Move32(Location destination, Location source); - void LoadFromShiftedRegOffset(Primitive::Type type, + void LoadFromShiftedRegOffset(DataType::Type type, Location out_loc, vixl::aarch32::Register base, vixl::aarch32::Register reg_index, vixl::aarch32::Condition cond = vixl::aarch32::al); - void StoreToShiftedRegOffset(Primitive::Type type, + void StoreToShiftedRegOffset(DataType::Type type, Location out_loc, vixl::aarch32::Register base, vixl::aarch32::Register reg_index, @@ -522,8 +521,8 @@ class CodeGeneratorARMVIXL : public CodeGenerator { const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; } - bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { - return type == Primitive::kPrimDouble || type == Primitive::kPrimLong; + bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + return type == DataType::Type::kFloat64 || type == DataType::Type::kInt64; } void ComputeSpillMask() OVERRIDE; @@ -551,34 +550,38 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings/types. The only difference is the interpretation of the - // offset_or_index. The PC-relative address is loaded with three instructions, + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // + // The PC-relative address is loaded with three instructions, // MOVW+MOVT to load the offset to base_reg and then ADD base_reg, PC. The offset // is calculated from the ADD's effective PC, i.e. PC+4 on Thumb2. Though we // currently emit these 3 instructions together, instruction scheduling could // split this sequence apart, so we keep separate labels for each of them. struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, uint32_t off_or_idx) + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx) : target_dex_file(dex_file), offset_or_index(off_or_idx) { } PcRelativePatchInfo(PcRelativePatchInfo&& other) = default; - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. + // Target dex file or null for .data.bmig.rel.ro patches. + const DexFile* target_dex_file; + // Either the boot image offset (to write to .data.bmig.rel.ro) or string/type/method index. uint32_t offset_or_index; vixl::aarch32::Label movw_label; vixl::aarch32::Label movt_label; vixl::aarch32::Label add_pc_label; }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, dex::TypeIndex type_index); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index); // Add a new baker read barrier patch and return the label to be bound // before the BNE instruction. @@ -592,7 +595,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { dex::TypeIndex type_index, Handle<mirror::Class> handle); - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; @@ -661,6 +664,28 @@ class CodeGeneratorARMVIXL : public CodeGenerator { ScaleFactor scale_factor, bool needs_null_check); + // Emit code checking the status of the Marking Register, and + // aborting the program if MR does not match the value stored in the + // art::Thread object. Code is only emitted in debug mode and if + // CompilerOptions::EmitRunTimeChecksInDebugMode returns true. + // + // Argument `code` is used to identify the different occurrences of + // MaybeGenerateMarkingRegisterCheck in the code generator, and is + // used together with kMarkingRegisterCheckBreakCodeBaseCode to + // create the value passed to the BKPT instruction. Note that unlike + // in the ARM64 code generator, where `__LINE__` is passed as `code` + // argument to + // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck, we cannot + // realistically do that here, as Encoding T1 for the BKPT + // instruction only accepts 8-bit immediate values. + // + // If `temp_loc` is a valid location, it is expected to be a + // register and will be used as a temporary to generate code; + // otherwise, a temporary will be fetched from the core register + // scratch pool. + virtual void MaybeGenerateMarkingRegisterCheck(int code, + Location temp_loc = Location::NoLocation()); + // Generate a read barrier for a heap reference within `instruction` // using a slow path. // @@ -715,7 +740,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) { - jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr)); + jump_tables_.emplace_back(new (GetGraph()->GetAllocator()) JumpTableARMVIXL(switch_instr)); return jump_tables_.back().get(); } void EmitJumpTables(); @@ -751,12 +776,12 @@ class CodeGeneratorARMVIXL : public CodeGenerator { }; VIXLUInt32Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, ArenaDeque<PcRelativePatchInfo>* patches); - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> static void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); // Labels for each block that will be compiled. // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory. @@ -774,15 +799,17 @@ class CodeGeneratorARMVIXL : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Baker read barrier patch info. ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_; diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc index 23d188d630..87e6d6834b 100644 --- a/compiler/optimizing/code_generator_mips.cc +++ b/compiler/optimizing/code_generator_mips.cc @@ -16,19 +16,24 @@ #include "code_generator_mips.h" +#include "arch/mips/asm_support_mips.h" #include "arch/mips/entrypoints_direct_mips.h" #include "arch/mips/instruction_set_features_mips.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips.h" +#include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" +#include "stack_map_stream.h" #include "thread.h" #include "utils/assembler.h" #include "utils/mips/assembler_mips.h" @@ -40,30 +45,38 @@ namespace mips { static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kMethodRegisterArgument = A0; -Location MipsReturnLocation(Primitive::Type return_type) { +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + +Location MipsReturnLocation(DataType::Type return_type) { switch (return_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: return Location::RegisterLocation(V0); - case Primitive::kPrimLong: + case DataType::Type::kUint64: + case DataType::Type::kInt64: return Location::RegisterPairLocation(V0, V1); - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: return Location::FpuRegisterLocation(F0); - case Primitive::kPrimVoid: + case DataType::Type::kVoid: return Location(); } UNREACHABLE(); } -Location InvokeDexCallingConventionVisitorMIPS::GetReturnLocation(Primitive::Type type) const { +Location InvokeDexCallingConventionVisitorMIPS::GetReturnLocation(DataType::Type type) const { return MipsReturnLocation(type); } @@ -71,16 +84,17 @@ Location InvokeDexCallingConventionVisitorMIPS::GetMethodLocation() const { return Location::RegisterLocation(kMethodRegisterArgument); } -Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(DataType::Type type) { Location next_location; switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { uint32_t gp_index = gp_index_++; if (gp_index < calling_convention.GetNumberOfRegisters()) { next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index)); @@ -91,7 +105,7 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(Primitive::Type break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t gp_index = gp_index_; gp_index_ += 2; if (gp_index + 1 < calling_convention.GetNumberOfRegisters()) { @@ -114,32 +128,34 @@ Location InvokeDexCallingConventionVisitorMIPS::GetNextLocation(Primitive::Type // Note: both float and double types are stored in even FPU registers. On 32 bit FPU, double // will take up the even/odd pair, while floats are stored in even regs only. // On 64 bit FPU, both double and float are stored in even registers only. - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { uint32_t float_index = float_index_++; if (float_index < calling_convention.GetNumberOfFpuRegisters()) { next_location = Location::FpuRegisterLocation( calling_convention.GetFpuRegisterAt(float_index)); } else { size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) - : Location::StackSlot(stack_offset); + next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) + : Location::StackSlot(stack_offset); } break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; } // Space on the stack is reserved for all arguments. - stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; + stack_index_ += DataType::Is64BitType(type) ? 2 : 1; return next_location; } -Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) { +Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) { return MipsReturnLocation(type); } @@ -164,10 +180,10 @@ class BoundsCheckSlowPathMIPS : public SlowPathCodeMIPS { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimInt, + DataType::Type::kInt32, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt); + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -208,13 +224,11 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { LoadClassSlowPathMIPS(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, - bool do_clinit, - const CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high = nullptr) + bool do_clinit) : SlowPathCodeMIPS(at), cls_(cls), dex_pc_(dex_pc), - do_clinit_(do_clinit), - bss_info_high_(bss_info_high) { + do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -222,28 +236,11 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - const bool baker_or_no_read_barriers = (!kUseReadBarrier || kUseBakerReadBarrier); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - const bool is_load_class_bss_entry = - (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. - Register entry_address = kNoRegister; - if (is_load_class_bss_entry && baker_or_no_read_barriers) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); - // In the unlucky case that `temp` is A0, we preserve the address in `out` across the - // kSaveEverything call. - entry_address = temp_is_a0 ? out.AsRegister<Register>() : temp; - DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); - if (temp_is_a0) { - __ Move(entry_address, temp); - } - } - dex::TypeIndex type_index = cls_->GetTypeIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage @@ -255,46 +252,16 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } - // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. - if (is_load_class_bss_entry && baker_or_no_read_barriers) { - // The class entry address was preserved in `entry_address` thanks to kSaveEverything. - DCHECK(bss_info_high_); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_); - bool reordering = __ SetReorder(false); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - __ SetReorder(reordering); - } - // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); mips_codegen->MoveLocation(out, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); } RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. - if (is_load_class_bss_entry && !baker_or_no_read_barriers) { - // For non-Baker read barriers we need to re-calculate the address of - // the class entry. - const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low); - __ StoreToOffset(kStoreWord, out.AsRegister<Register>(), TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - } __ B(GetExitLabel()); } @@ -310,95 +277,41 @@ class LoadClassSlowPathMIPS : public SlowPathCodeMIPS { // Whether to initialize the class. const bool do_clinit_; - // Pointer to the high half PC-relative patch info for HLoadClass/kBssEntry. - const CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS); }; class LoadStringSlowPathMIPS : public SlowPathCodeMIPS { public: - explicit LoadStringSlowPathMIPS(HLoadString* instruction, - const CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high) - : SlowPathCodeMIPS(instruction), bss_info_high_(bss_info_high) {} + explicit LoadStringSlowPathMIPS(HLoadString* instruction) + : SlowPathCodeMIPS(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); - Register out = locations->Out().AsRegister<Register>(); + const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); - const bool baker_or_no_read_barriers = (!kUseReadBarrier || kUseBakerReadBarrier); InvokeRuntimeCallingConvention calling_convention; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. - Register entry_address = kNoRegister; - if (baker_or_no_read_barriers) { - Register temp = locations->GetTemp(0).AsRegister<Register>(); - bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); - // In the unlucky case that `temp` is A0, we preserve the address in `out` across the - // kSaveEverything call. - entry_address = temp_is_a0 ? out : temp; - DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); - if (temp_is_a0) { - __ Move(entry_address, temp); - } - } - __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); mips_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - // Store the resolved string to the BSS entry. - if (baker_or_no_read_barriers) { - // The string entry address was preserved in `entry_address` thanks to kSaveEverything. - DCHECK(bss_info_high_); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, bss_info_high_); - bool reordering = __ SetReorder(false); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - __ SetReorder(reordering); - } - - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); mips_codegen->MoveLocation(locations->Out(), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); RestoreLiveRegisters(codegen, locations); - // Store the resolved string to the BSS entry. - if (!baker_or_no_read_barriers) { - // For non-Baker read barriers we need to re-calculate the address of - // the string entry. - const bool isR6 = mips_codegen->GetInstructionSetFeatures().IsR6(); - Register base = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); - CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - mips_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); - bool reordering = __ SetReorder(false); - mips_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base, info_low); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); - } __ B(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS"; } private: - // Pointer to the high half PC-relative patch info. - const CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS); }; @@ -455,6 +368,10 @@ class SuspendCheckSlowPathMIPS : public SlowPathCodeMIPS { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS"; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -478,7 +395,7 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -487,14 +404,14 @@ class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); - Primitive::Type ret_type = instruction_->GetType(); + DataType::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { @@ -552,21 +469,21 @@ class ArraySetSlowPathMIPS : public SlowPathCodeMIPS { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -963,19 +880,19 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove(obj_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -989,8 +906,8 @@ class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS { CheckEntrypointTypes< kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>(); mips_codegen->MoveLocation(out_, - calling_convention.GetReturnLocation(Primitive::kPrimNot), - Primitive::kPrimNot); + calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -1055,15 +972,15 @@ class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS { CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen); mips_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_, - Primitive::kPrimNot); + DataType::Type::kReference); mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), this); CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>(); mips_codegen->MoveLocation(out_, - calling_convention.GetReturnLocation(Primitive::kPrimNot), - Primitive::kPrimNot); + calling_convention.GetReturnLocation(DataType::Type::kReference), + DataType::Type::kReference); RestoreLiveRegisters(codegen, locations); __ B(GetExitLabel()); @@ -1095,18 +1012,19 @@ CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph, block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena(), &isa_features), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator(), &isa_features), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), clobbered_ra_(false) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); @@ -1122,12 +1040,13 @@ void CodeGeneratorMIPS::Finalize(CodeAllocator* allocator) { __ FinalizeCode(); // Adjust native pc offsets in stack maps. - for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + StackMapStream* stack_map_stream = GetStackMapStream(); + for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { uint32_t old_position = - stack_map_stream_.GetStackMap(i).native_pc_code_offset.Uint32Value(kMips); + stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); - stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + stack_map_stream->SetStackMapNativePcOffset(i, new_position); } // Adjust pc offsets for the disassembly information. @@ -1161,7 +1080,7 @@ void ParallelMoveResolverMIPS::EmitMove(size_t index) { void ParallelMoveResolverMIPS::EmitSwap(size_t index) { DCHECK_LT(index, moves_.size()); MoveOperands* move = moves_[index]; - Primitive::Type type = move->GetType(); + DataType::Type type = move->GetType(); Location loc1 = move->GetDestination(); Location loc2 = move->GetSource(); @@ -1180,22 +1099,28 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(r2, r1); __ Move(r1, TMP); } else if (loc1.IsFpuRegister() && loc2.IsFpuRegister()) { - FRegister f1 = loc1.AsFpuRegister<FRegister>(); - FRegister f2 = loc2.AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { - __ MovS(FTMP, f2); - __ MovS(f2, f1); - __ MovS(f1, FTMP); + if (codegen_->GetGraph()->HasSIMD()) { + __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1)); + __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2)); + __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP)); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); - __ MovD(FTMP, f2); - __ MovD(f2, f1); - __ MovD(f1, FTMP); + FRegister f1 = loc1.AsFpuRegister<FRegister>(); + FRegister f2 = loc2.AsFpuRegister<FRegister>(); + if (type == DataType::Type::kFloat32) { + __ MovS(FTMP, f2); + __ MovS(f2, f1); + __ MovS(f1, FTMP); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + __ MovD(FTMP, f2); + __ MovD(f2, f1); + __ MovD(f1, FTMP); + } } } else if ((loc1.IsRegister() && loc2.IsFpuRegister()) || (loc1.IsFpuRegister() && loc2.IsRegister())) { // Swap FPR and GPR. - DCHECK_EQ(type, Primitive::kPrimFloat); // Can only swap a float. + DCHECK_EQ(type, DataType::Type::kFloat32); // Can only swap a float. FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() : loc2.AsFpuRegister<FRegister>(); Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>(); @@ -1217,7 +1142,7 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { } else if ((loc1.IsRegisterPair() && loc2.IsFpuRegister()) || (loc1.IsFpuRegister() && loc2.IsRegisterPair())) { // Swap FPR and GPR register pair. - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() : loc2.AsFpuRegister<FRegister>(); Register r2_l = loc1.IsRegisterPair() ? loc1.AsRegisterPairLow<Register>() @@ -1237,6 +1162,8 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ false); } else if (loc1.IsDoubleStackSlot() && loc2.IsDoubleStackSlot()) { Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true); + } else if (loc1.IsSIMDStackSlot() && loc2.IsSIMDStackSlot()) { + ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); } else if ((loc1.IsRegister() && loc2.IsStackSlot()) || (loc1.IsStackSlot() && loc2.IsRegister())) { Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>(); @@ -1259,16 +1186,23 @@ void ParallelMoveResolverMIPS::EmitSwap(size_t index) { __ Move(TMP, reg_h); __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h); __ StoreToOffset(kStoreWord, TMP, SP, offset_h); + } else if ((loc1.IsFpuRegister() && loc2.IsSIMDStackSlot()) || + (loc1.IsSIMDStackSlot() && loc2.IsFpuRegister())) { + Location fp_loc = loc1.IsFpuRegister() ? loc1 : loc2; + intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex(); + __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(fp_loc)); + __ LoadQFromOffset(fp_loc.AsFpuRegister<FRegister>(), SP, offset); + __ StoreQToOffset(FTMP, SP, offset); } else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) { FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>() : loc2.AsFpuRegister<FRegister>(); intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ MovS(FTMP, reg); __ LoadSFromOffset(reg, SP, offset); __ StoreSToOffset(FTMP, SP, offset); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); __ MovD(FTMP, reg); __ LoadDFromOffset(reg, SP, offset); __ StoreDToOffset(FTMP, SP, offset); @@ -1292,7 +1226,7 @@ void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot // automatically unspilled when the scratch scope object is destroyed). ScratchRegisterScope ensure_scratch(this, TMP, V0, codegen_->GetNumberOfCoreRegisters()); // If V0 spills onto the stack, SP-relative offsets need to be adjusted. - int stack_offset = ensure_scratch.IsSpilled() ? kMipsWordSize : 0; + int stack_offset = ensure_scratch.IsSpilled() ? kStackAlignment : 0; for (int i = 0; i <= (double_slot ? 1 : 0); i++, stack_offset += kMipsWordSize) { __ LoadFromOffset(kLoadWord, Register(ensure_scratch.GetRegister()), @@ -1310,6 +1244,13 @@ void ParallelMoveResolverMIPS::Exchange(int index1, int index2, bool double_slot } } +void ParallelMoveResolverMIPS::ExchangeQuadSlots(int index1, int index2) { + __ LoadQFromOffset(FTMP, SP, index1); + __ LoadQFromOffset(FTMP2, SP, index2); + __ StoreQToOffset(FTMP, SP, index2); + __ StoreQToOffset(FTMP2, SP, index1); +} + void CodeGeneratorMIPS::ComputeSpillMask() { core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; @@ -1339,13 +1280,20 @@ static dwarf::Reg DWARFReg(Register reg) { void CodeGeneratorMIPS::GenerateFrameEntry() { __ Bind(&frame_entry_label_); - bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kMips) || !IsLeafMethod(); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + } + + bool do_overflow_check = + FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips) || !IsLeafMethod(); if (do_overflow_check) { __ LoadFromOffset(kLoadWord, ZERO, SP, - -static_cast<int32_t>(GetStackOverflowReservedBytes(kMips))); + -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kMips))); RecordPcInfo(nullptr, 0); } @@ -1357,8 +1305,9 @@ void CodeGeneratorMIPS::GenerateFrameEntry() { } // Make sure the frame size isn't unreasonably large. - if (GetFrameSize() > GetStackOverflowReservedBytes(kMips)) { - LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips) << " bytes"; + if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kMips)) { + LOG(FATAL) << "Stack frame larger than " + << GetStackOverflowReservedBytes(InstructionSet::kMips) << " bytes"; } // Spill callee-saved registers. @@ -1458,7 +1407,7 @@ VectorRegister VectorRegisterFrom(Location location) { void CodeGeneratorMIPS::MoveLocation(Location destination, Location source, - Primitive::Type dst_type) { + DataType::Type dst_type) { if (source.Equals(destination)) { return; } @@ -1486,17 +1435,18 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, __ Mfc1(dst_low, src); __ MoveFromFpuHigh(dst_high, src); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; int32_t off = source.GetStackIndex(); Register r = destination.AsRegisterPairLow<Register>(); __ LoadFromOffset(kLoadDoubleword, r, SP, off); } } else if (destination.IsFpuRegister()) { if (source.IsRegister()) { - DCHECK(!Primitive::Is64BitType(dst_type)); + DCHECK(!DataType::Is64BitType(dst_type)); __ Mtc1(source.AsRegister<Register>(), destination.AsFpuRegister<FRegister>()); } else if (source.IsRegisterPair()) { - DCHECK(Primitive::Is64BitType(dst_type)); + DCHECK(DataType::Is64BitType(dst_type)); FRegister dst = destination.AsFpuRegister<FRegister>(); Register src_high = source.AsRegisterPairHigh<Register>(); Register src_low = source.AsRegisterPairLow<Register>(); @@ -1507,20 +1457,20 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, __ MoveV(VectorRegisterFrom(destination), VectorRegisterFrom(source)); } else { - if (Primitive::Is64BitType(dst_type)) { + if (DataType::Is64BitType(dst_type)) { __ MovD(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); } else { - DCHECK_EQ(dst_type, Primitive::kPrimFloat); + DCHECK_EQ(dst_type, DataType::Type::kFloat32); __ MovS(destination.AsFpuRegister<FRegister>(), source.AsFpuRegister<FRegister>()); } } } else if (source.IsSIMDStackSlot()) { __ LoadQFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); } else if (source.IsDoubleStackSlot()) { - DCHECK(Primitive::Is64BitType(dst_type)); + DCHECK(DataType::Is64BitType(dst_type)); __ LoadDFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); } else { - DCHECK(!Primitive::Is64BitType(dst_type)); + DCHECK(!DataType::Is64BitType(dst_type)); DCHECK(source.IsStackSlot()) << "Cannot move from " << source << " to " << destination; __ LoadSFromOffset(destination.AsFpuRegister<FRegister>(), SP, source.GetStackIndex()); } @@ -1539,7 +1489,8 @@ void CodeGeneratorMIPS::MoveLocation(Location destination, } else if (source.IsFpuRegister()) { __ StoreDToOffset(source.AsFpuRegister<FRegister>(), SP, dst_offset); } else { - DCHECK(source.IsDoubleStackSlot()) << "Cannot move from " << source << " to " << destination; + DCHECK(source.IsDoubleStackSlot()) + << "Cannot move from " << source << " to " << destination; __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex()); __ StoreToOffset(kStoreWord, TMP, SP, dst_offset); __ LoadFromOffset(kLoadWord, TMP, SP, source.GetStackIndex() + 4); @@ -1627,12 +1578,12 @@ void CodeGeneratorMIPS::AddLocationAsTemp(Location location, LocationSummary* lo } } -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); @@ -1642,80 +1593,88 @@ inline void CodeGeneratorMIPS::EmitPcRelativeLinkerPatches( uint32_t pc_rel_offset = info_high.pc_rel_label.IsBound() ? __ GetLabelLocation(&info_high.pc_rel_label) : __ GetPcRelBaseLabelLocation(); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } -void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + boot_image_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } -CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewStringBssEntryPatch( const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + +CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch( + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1735,16 +1694,17 @@ Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, - Register base, - PcRelativePatchInfo* info_low) { + Register base) { DCHECK(!info_high->patch_info_high); DCHECK_NE(out, base); + bool reordering = __ SetReorder(false); if (GetInstructionSetFeatures().IsR6()) { DCHECK_EQ(base, ZERO); __ Bind(&info_high->label); __ Bind(&info_high->pc_rel_label); // Add the high half of a 32-bit offset to PC. __ Auipc(out, /* placeholder */ 0x1234); + __ SetReorder(reordering); } else { // If base is ZERO, emit NAL to obtain the actual base. if (base == ZERO) { @@ -1758,32 +1718,29 @@ void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo if (base == ZERO) { __ Bind(&info_high->pc_rel_label); } + __ SetReorder(reordering); // Add the high half of a 32-bit offset to PC. __ Addu(out, out, (base == ZERO) ? RA : base); } // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. lw, jialc, addiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); } CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootStringPatch( const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite(StringReference(&dex_file, dex_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); - jit_string_patches_.emplace_back(dex_file, dex_index.index_); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); + jit_string_patches_.emplace_back(dex_file, string_index.index_); return &jit_string_patches_.back(); } CodeGeneratorMIPS::JitPatchInfo* CodeGeneratorMIPS::NewJitRootClassPatch( const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); - jit_class_patches_.emplace_back(dex_file, dex_index.index_); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); + jit_class_patches_.emplace_back(dex_file, type_index.index_); return &jit_class_patches_.back(); } @@ -1791,40 +1748,37 @@ void CodeGeneratorMIPS::PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, const CodeGeneratorMIPS::JitPatchInfo& info, uint64_t index_in_table) const { - uint32_t literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t high_literal_offset = GetAssembler().GetLabelLocation(&info.high_label); + uint32_t low_literal_offset = GetAssembler().GetLabelLocation(&info.low_label); uintptr_t address = reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>); uint32_t addr32 = dchecked_integral_cast<uint32_t>(address); // lui reg, addr32_high - DCHECK_EQ(code[literal_offset + 0], 0x34); - DCHECK_EQ(code[literal_offset + 1], 0x12); - DCHECK_EQ((code[literal_offset + 2] & 0xE0), 0x00); - DCHECK_EQ(code[literal_offset + 3], 0x3C); + DCHECK_EQ(code[high_literal_offset + 0], 0x34); + DCHECK_EQ(code[high_literal_offset + 1], 0x12); + DCHECK_EQ((code[high_literal_offset + 2] & 0xE0), 0x00); + DCHECK_EQ(code[high_literal_offset + 3], 0x3C); // instr reg, reg, addr32_low - DCHECK_EQ(code[literal_offset + 4], 0x78); - DCHECK_EQ(code[literal_offset + 5], 0x56); + DCHECK_EQ(code[low_literal_offset + 0], 0x78); + DCHECK_EQ(code[low_literal_offset + 1], 0x56); addr32 += (addr32 & 0x8000) << 1; // Account for sign extension in "instr reg, reg, addr32_low". // lui reg, addr32_high - code[literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); - code[literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); + code[high_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 16); + code[high_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 24); // instr reg, reg, addr32_low - code[literal_offset + 4] = static_cast<uint8_t>(addr32 >> 0); - code[literal_offset + 5] = static_cast<uint8_t>(addr32 >> 8); + code[low_literal_offset + 0] = static_cast<uint8_t>(addr32 >> 0); + code[low_literal_offset + 1] = static_cast<uint8_t>(addr32 >> 8); } void CodeGeneratorMIPS::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const JitPatchInfo& info : jit_string_patches_) { - const auto it = jit_string_roots_.find(StringReference(&info.target_dex_file, - dex::StringIndex(info.index))); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + StringReference string_reference(&info.target_dex_file, dex::StringIndex(info.index)); + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const JitPatchInfo& info : jit_class_patches_) { - const auto it = jit_class_roots_.find(TypeReference(&info.target_dex_file, - dex::TypeIndex(info.index))); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + TypeReference type_reference(&info.target_dex_file, dex::TypeIndex(info.index)); + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } } @@ -1865,6 +1819,11 @@ void CodeGeneratorMIPS::SetupBlockedRegisters() const { blocked_core_registers_[TMP] = true; blocked_fpu_registers_[FTMP] = true; + if (GetInstructionSetFeatures().HasMsa()) { + // To be used just for MSA instructions. + blocked_fpu_registers_[FTMP2] = true; + } + // Reserve suspend and thread registers. blocked_core_registers_[S0] = true; blocked_core_registers_[TR] = true; @@ -1963,9 +1922,15 @@ void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool d void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path, Register class_reg) { - __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, mirror::Class::kStatusInitialized); - __ Blt(TMP, AT, slow_path->GetEntryLabel()); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnez(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); @@ -1978,8 +1943,19 @@ void InstructionCodeGeneratorMIPS::GenerateMemoryBarrier(MemBarrierKind kind ATT void InstructionCodeGeneratorMIPS::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathMIPS* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathMIPS(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathMIPS*>(instruction->GetSlowPath()); + + if (slow_path == nullptr) { + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathMIPS(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset(kLoadUnsignedHalfword, TMP, @@ -2003,10 +1979,11 @@ InstructionCodeGeneratorMIPS::InstructionCodeGeneratorMIPS(HGraph* graph, void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { DCHECK_EQ(instruction->InputCount(), 2U); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - Primitive::Type type = instruction->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); HInstruction* right = instruction->InputAt(1); bool can_use_imm = false; @@ -2014,11 +1991,22 @@ void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { int32_t imm = CodeGenerator::GetInt32ValueOf(right->AsConstant()); if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { can_use_imm = IsUint<16>(imm); - } else if (instruction->IsAdd()) { - can_use_imm = IsInt<16>(imm); } else { - DCHECK(instruction->IsSub()); - can_use_imm = IsInt<16>(-imm); + DCHECK(instruction->IsSub() || instruction->IsAdd()); + if (instruction->IsSub()) { + imm = -imm; + } + if (isR6) { + bool single_use = right->GetUses().HasExactlyOneElement(); + int16_t imm_high = High16Bits(imm); + int16_t imm_low = Low16Bits(imm); + if (imm_low < 0) { + imm_high += 1; + } + can_use_imm = !((imm_high != 0) && (imm_low != 0)) || single_use; + } else { + can_use_imm = IsInt<16>(imm); + } } } if (can_use_imm) @@ -2029,15 +2017,15 @@ void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK(instruction->IsAdd() || instruction->IsSub()); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); @@ -2050,11 +2038,12 @@ void LocationsBuilderMIPS::HandleBinaryOp(HBinaryOperation* instruction) { } void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); Location rhs_location = locations->InAt(1); @@ -2083,22 +2072,37 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) __ Xori(dst, lhs, rhs_imm); else __ Xor(dst, lhs, rhs_reg); - } else if (instruction->IsAdd()) { - if (use_imm) - __ Addiu(dst, lhs, rhs_imm); - else - __ Addu(dst, lhs, rhs_reg); } else { - DCHECK(instruction->IsSub()); - if (use_imm) - __ Addiu(dst, lhs, -rhs_imm); - else + DCHECK(instruction->IsAdd() || instruction->IsSub()); + if (use_imm) { + if (instruction->IsSub()) { + rhs_imm = -rhs_imm; + } + if (IsInt<16>(rhs_imm)) { + __ Addiu(dst, lhs, rhs_imm); + } else { + DCHECK(isR6); + int16_t rhs_imm_high = High16Bits(rhs_imm); + int16_t rhs_imm_low = Low16Bits(rhs_imm); + if (rhs_imm_low < 0) { + rhs_imm_high += 1; + } + __ Aui(dst, lhs, rhs_imm_high); + if (rhs_imm_low != 0) { + __ Addiu(dst, dst, rhs_imm_low); + } + } + } else if (instruction->IsAdd()) { + __ Addu(dst, lhs, rhs_reg); + } else { + DCHECK(instruction->IsSub()); __ Subu(dst, lhs, rhs_reg); + } } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -2239,20 +2243,20 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FRegister dst = locations->Out().AsFpuRegister<FRegister>(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); if (instruction->IsAdd()) { - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ AddS(dst, lhs, rhs); } else { __ AddD(dst, lhs, rhs); } } else { DCHECK(instruction->IsSub()); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ SubS(dst, lhs, rhs); } else { __ SubD(dst, lhs, rhs); @@ -2269,15 +2273,15 @@ void InstructionCodeGeneratorMIPS::HandleBinaryOp(HBinaryOperation* instruction) void LocationsBuilderMIPS::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - Primitive::Type type = instr->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + DataType::Type type = instr->GetResultType(); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); locations->SetOut(Location::RequiresRegister()); @@ -2292,20 +2296,20 @@ static constexpr size_t kMipsBitsPerWord = kMipsWordSize * kBitsPerByte; void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); - Primitive::Type type = instr->GetType(); + DataType::Type type = instr->GetType(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); Register rhs_reg = use_imm ? ZERO : rhs_location.AsRegister<Register>(); int64_t rhs_imm = use_imm ? CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()) : 0; const uint32_t shift_mask = - (type == Primitive::kPrimInt) ? kMaxIntShiftDistance : kMaxLongShiftDistance; + (type == DataType::Type::kInt32) ? kMaxIntShiftDistance : kMaxLongShiftDistance; const uint32_t shift_value = rhs_imm & shift_mask; // Are the INS (Insert Bit Field) and ROTR instructions supported? bool has_ins_rotr = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); if (use_imm) { @@ -2354,7 +2358,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -2441,6 +2445,7 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { } } } else { + const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); MipsLabel done; if (instr->IsShl()) { __ Sllv(dst_low, lhs_low, rhs_reg); @@ -2450,9 +2455,14 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Sllv(dst_high, lhs_high, rhs_reg); __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - __ Beqz(TMP, &done); - __ Move(dst_high, dst_low); - __ Move(dst_low, ZERO); + if (isR6) { + __ Beqzc(TMP, &done, /* is_bare */ true); + __ Move(dst_high, dst_low); + __ Move(dst_low, ZERO); + } else { + __ Movn(dst_high, dst_low, TMP); + __ Movn(dst_low, ZERO, TMP); + } } else if (instr->IsShr()) { __ Srav(dst_high, lhs_high, rhs_reg); __ Nor(AT, ZERO, rhs_reg); @@ -2461,9 +2471,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Srlv(dst_low, lhs_low, rhs_reg); __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - __ Beqz(TMP, &done); - __ Move(dst_low, dst_high); - __ Sra(dst_high, dst_high, 31); + if (isR6) { + __ Beqzc(TMP, &done, /* is_bare */ true); + __ Move(dst_low, dst_high); + __ Sra(dst_high, dst_high, 31); + } else { + __ Sra(AT, dst_high, 31); + __ Movn(dst_low, dst_high, TMP); + __ Movn(dst_high, AT, TMP); + } } else if (instr->IsUShr()) { __ Srlv(dst_high, lhs_high, rhs_reg); __ Nor(AT, ZERO, rhs_reg); @@ -2472,10 +2488,15 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Srlv(dst_low, lhs_low, rhs_reg); __ Or(dst_low, dst_low, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - __ Beqz(TMP, &done); - __ Move(dst_low, dst_high); - __ Move(dst_high, ZERO); - } else { + if (isR6) { + __ Beqzc(TMP, &done, /* is_bare */ true); + __ Move(dst_low, dst_high); + __ Move(dst_high, ZERO); + } else { + __ Movn(dst_low, dst_high, TMP); + __ Movn(dst_high, ZERO, TMP); + } + } else { // Rotate. __ Nor(AT, ZERO, rhs_reg); __ Srlv(TMP, lhs_low, rhs_reg); __ Sll(dst_low, lhs_high, 1); @@ -2486,10 +2507,16 @@ void InstructionCodeGeneratorMIPS::HandleShift(HBinaryOperation* instr) { __ Sllv(dst_high, dst_high, AT); __ Or(dst_high, dst_high, TMP); __ Andi(TMP, rhs_reg, kMipsBitsPerWord); - __ Beqz(TMP, &done); - __ Move(TMP, dst_high); - __ Move(dst_high, dst_low); - __ Move(dst_low, TMP); + if (isR6) { + __ Beqzc(TMP, &done, /* is_bare */ true); + __ Move(TMP, dst_high); + __ Move(dst_high, dst_low); + __ Move(dst_low, TMP); + } else { + __ Movn(AT, dst_high, TMP); + __ Movn(dst_high, dst_low, TMP); + __ Movn(dst_low, AT, TMP); + } } __ Bind(&done); } @@ -2518,20 +2545,20 @@ void InstructionCodeGeneratorMIPS::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (type == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(type)) { + if (DataType::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps in the case of an object array get with @@ -2545,7 +2572,12 @@ void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2565,11 +2597,12 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && instruction->IsStringCharAt(); switch (type) { - case Primitive::kPrimBoolean: { + case DataType::Type::kBool: + case DataType::Type::kUint8: { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -2582,7 +2615,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = @@ -2595,20 +2628,7 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimShort: { - Register out = out_loc.AsRegister<Register>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); - } else { - __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); - } - break; - } - - case Primitive::kPrimChar: { + case DataType::Type::kUint16: { Register out = out_loc.AsRegister<Register>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -2652,6 +2672,9 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP); __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset); __ Bind(&done); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index_reg, obj); + __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); } else { __ ShiftAndAdd(TMP, index_reg, obj, TIMES_2, TMP); __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker); @@ -2660,13 +2683,32 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt16: { + Register out = out_loc.AsRegister<Register>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index.AsRegister<Register>(), obj); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); + } else { + __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_2, TMP); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); + } + break; + } + + case DataType::Type::kInt32: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index.AsRegister<Register>(), obj); + __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); } else { __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP); __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker); @@ -2674,23 +2716,39 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { Register out = out_loc.AsRegister<Register>(); if (index.IsConstant()) { @@ -2718,12 +2776,15 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register out = out_loc.AsRegisterPairLow<Register>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index.AsRegister<Register>(), obj); + __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); } else { __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP); __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker); @@ -2731,12 +2792,15 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; __ LoadSFromOffset(out, obj, offset, null_checker); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index.AsRegister<Register>(), obj); + __ LoadSFromOffset(out, TMP, data_offset, null_checker); } else { __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_4, TMP); __ LoadSFromOffset(out, TMP, data_offset, null_checker); @@ -2744,12 +2808,15 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { FRegister out = out_loc.AsFpuRegister<FRegister>(); if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset; __ LoadDFromOffset(out, obj, offset, null_checker); + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(TMP, index.AsRegister<Register>(), obj); + __ LoadDFromOffset(out, TMP, data_offset, null_checker); } else { __ ShiftAndAdd(TMP, index.AsRegister<Register>(), obj, TIMES_8, TMP); __ LoadDFromOffset(out, TMP, data_offset, null_checker); @@ -2757,14 +2824,16 @@ void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } } void LocationsBuilderMIPS::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -2802,13 +2871,13 @@ Location LocationsBuilderMIPS::FpuRegisterOrConstantForStore(HInstruction* instr } void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : @@ -2816,7 +2885,7 @@ void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(2)->GetType())) { locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); @@ -2832,7 +2901,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { Register obj = locations->InAt(0).AsRegister<Register>(); Location index = locations->InAt(1); Location value_location = locations->InAt(2); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -2840,8 +2909,9 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { Register base_reg = index.IsConstant() ? obj : TMP; switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; @@ -2858,11 +2928,13 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_2, base_reg); } @@ -2876,10 +2948,12 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); } @@ -2893,7 +2967,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (value_location.IsConstant()) { // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); @@ -2921,7 +2995,7 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { SlowPathCodeMIPS* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathMIPS(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { MipsLabel non_zero; @@ -2929,6 +3003,8 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); } @@ -3008,10 +3084,12 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg); } @@ -3025,10 +3103,12 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_4, base_reg); } @@ -3042,10 +3122,12 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; + } else if (instruction->InputAt(1)->IsIntermediateArrayAddressIndex()) { + __ Addu(base_reg, index.AsRegister<Register>(), obj); } else { __ ShiftAndAdd(base_reg, index.AsRegister<Register>(), obj, TIMES_8, base_reg); } @@ -3059,40 +3141,132 @@ void InstructionCodeGeneratorMIPS::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } } +void LocationsBuilderMIPS::VisitIntermediateArrayAddressIndex( + HIntermediateArrayAddressIndex* instruction) { + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + + HIntConstant* shift = instruction->GetShift()->AsIntConstant(); + + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(shift)); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +void InstructionCodeGeneratorMIPS::VisitIntermediateArrayAddressIndex( + HIntermediateArrayAddressIndex* instruction) { + LocationSummary* locations = instruction->GetLocations(); + Register index_reg = locations->InAt(0).AsRegister<Register>(); + uint32_t shift = instruction->GetShift()->AsIntConstant()->GetValue(); + __ Sll(locations->Out().AsRegister<Register>(), index_reg, shift); +} + void LocationsBuilderMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + + bool const_index = false; + bool const_length = false; + + if (index->IsConstant()) { + if (length->IsConstant()) { + const_index = true; + const_length = true; + } else { + int32_t index_value = index->AsIntConstant()->GetValue(); + if (index_value < 0 || IsInt<16>(index_value + 1)) { + const_index = true; + } + } + } else if (length->IsConstant()) { + int32_t length_value = length->AsIntConstant()->GetValue(); + if (IsUint<15>(length_value)) { + const_length = true; + } + } + + locations->SetInAt(0, const_index + ? Location::ConstantLocation(index->AsConstant()) + : Location::RequiresRegister()); + locations->SetInAt(1, const_length + ? Location::ConstantLocation(length->AsConstant()) + : Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = instruction->GetLocations(); - BoundsCheckSlowPathMIPS* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS(instruction); - codegen_->AddSlowPath(slow_path); - - Register index = locations->InAt(0).AsRegister<Register>(); - Register length = locations->InAt(1).AsRegister<Register>(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0 || index >= length) { + BoundsCheckSlowPathMIPS* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); + codegen_->AddSlowPath(slow_path); + __ B(slow_path->GetEntryLabel()); + } else { + // Nothing to be done. + } + return; + } - // length is limited by the maximum positive signed 32-bit integer. - // Unsigned comparison of length and index checks for index < 0 - // and for length <= index simultaneously. - __ Bgeu(index, length, slow_path->GetEntryLabel()); + BoundsCheckSlowPathMIPS* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); + codegen_->AddSlowPath(slow_path); + Register index = index_loc.AsRegister<Register>(); + if (length == 0) { + __ B(slow_path->GetEntryLabel()); + } else if (length == 1) { + __ Bnez(index, slow_path->GetEntryLabel()); + } else { + DCHECK(IsUint<15>(length)) << length; + __ Sltiu(TMP, index, length); + __ Beqz(TMP, slow_path->GetEntryLabel()); + } + } else { + Register length = length_loc.AsRegister<Register>(); + BoundsCheckSlowPathMIPS* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS(instruction); + codegen_->AddSlowPath(slow_path); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0) { + __ B(slow_path->GetEntryLabel()); + } else if (index == 0) { + __ Blez(length, slow_path->GetEntryLabel()); + } else { + DCHECK(IsInt<16>(index + 1)) << index; + __ Sltiu(TMP, length, index + 1); + __ Bnez(TMP, slow_path->GetEntryLabel()); + } + } else { + Register index = index_loc.AsRegister<Register>(); + __ Bgeu(index, length, slow_path->GetEntryLabel()); + } + } } // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -3108,27 +3282,10 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); @@ -3155,21 +3312,10 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); MipsLabel done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(slow_path); // Avoid this check if we know `obj` is not null. @@ -3323,7 +3469,7 @@ void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) { void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -3332,7 +3478,7 @@ void LocationsBuilderMIPS::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( check->GetLoadClass(), check, check->GetDexPc(), @@ -3343,31 +3489,32 @@ void InstructionCodeGeneratorMIPS::VisitClinitCheck(HClinitCheck* check) { } void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { - Primitive::Type in_type = compare->InputAt(0)->GetType(); + DataType::Type in_type = compare->InputAt(0)->GetType(); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); // Output overlaps because it is written before doing the low comparison. locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -3381,18 +3528,19 @@ void LocationsBuilderMIPS::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); Register res = locations->Out().AsRegister<Register>(); - Primitive::Type in_type = instruction->InputAt(0)->GetType(); + DataType::Type in_type = instruction->InputAt(0)->GetType(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { Register lhs = locations->InAt(0).AsRegister<Register>(); Register rhs = locations->InAt(1).AsRegister<Register>(); __ Slt(TMP, lhs, rhs); @@ -3400,7 +3548,7 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { __ Subu(res, res, TMP); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { MipsLabel done; Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); Register lhs_low = locations->InAt(0).AsRegisterPairLow<Register>(); @@ -3418,7 +3566,7 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { bool gt_bias = instruction->IsGtBias(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); @@ -3458,7 +3606,7 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { __ Bind(&done); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { bool gt_bias = instruction->IsGtBias(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); @@ -3505,16 +3653,16 @@ void InstructionCodeGeneratorMIPS::VisitCompare(HCompare* instruction) { } void LocationsBuilderMIPS::HandleCondition(HCondition* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); switch (instruction->InputAt(0)->GetType()) { default: - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); break; @@ -3529,7 +3677,7 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { return; } - Primitive::Type type = instruction->InputAt(0)->GetType(); + DataType::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { @@ -3538,12 +3686,12 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { GenerateIntCompare(instruction->GetCondition(), locations); return; - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateLongCompare(instruction->GetCondition(), locations); return; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); return; } @@ -3551,80 +3699,258 @@ void InstructionCodeGeneratorMIPS::HandleCondition(HCondition* instruction) { void InstructionCodeGeneratorMIPS::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); DCHECK(second.IsConstant()); - - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + int64_t imm = Int64FromConstant(second.GetConstant()); DCHECK(imm == 1 || imm == -1); - if (instruction->IsRem()) { - __ Move(out, ZERO); + if (instruction->GetResultType() == DataType::Type::kInt32) { + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + + if (instruction->IsRem()) { + __ Move(out, ZERO); + } else { + if (imm == -1) { + __ Subu(out, ZERO, dividend); + } else if (out != dividend) { + __ Move(out, dividend); + } + } } else { - if (imm == -1) { - __ Subu(out, ZERO, dividend); - } else if (out != dividend) { - __ Move(out, dividend); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + Register out_high = locations->Out().AsRegisterPairHigh<Register>(); + Register out_low = locations->Out().AsRegisterPairLow<Register>(); + Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); + + if (instruction->IsRem()) { + __ Move(out_high, ZERO); + __ Move(out_low, ZERO); + } else { + if (imm == -1) { + __ Subu(out_low, ZERO, in_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, ZERO, in_high); + __ Subu(out_high, out_high, AT); + } else { + __ Move(out_low, in_low); + __ Move(out_high, in_high); + } } } } void InstructionCodeGeneratorMIPS::DivRemByPowerOfTwo(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); + const bool is_r2_or_newer = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); + const bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); DCHECK(second.IsConstant()); - Register out = locations->Out().AsRegister<Register>(); - Register dividend = locations->InAt(0).AsRegister<Register>(); - int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); - uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); - int ctz_imm = CTZ(abs_imm); + if (instruction->GetResultType() == DataType::Type::kInt32) { + Register out = locations->Out().AsRegister<Register>(); + Register dividend = locations->InAt(0).AsRegister<Register>(); + int32_t imm = second.GetConstant()->AsIntConstant()->GetValue(); + uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); - if (instruction->IsDiv()) { - if (ctz_imm == 1) { - // Fast path for division by +/-2, which is very common. - __ Srl(TMP, dividend, 31); + if (instruction->IsDiv()) { + if (ctz_imm == 1) { + // Fast path for division by +/-2, which is very common. + __ Srl(TMP, dividend, 31); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + } + __ Addu(out, dividend, TMP); + __ Sra(out, out, ctz_imm); + if (imm < 0) { + __ Subu(out, ZERO, out); + } } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - } - __ Addu(out, dividend, TMP); - __ Sra(out, out, ctz_imm); - if (imm < 0) { - __ Subu(out, ZERO, out); + if (ctz_imm == 1) { + // Fast path for modulo +/-2, which is very common. + __ Sra(TMP, dividend, 31); + __ Subu(out, dividend, TMP); + __ Andi(out, out, 1); + __ Addu(out, out, TMP); + } else { + __ Sra(TMP, dividend, 31); + __ Srl(TMP, TMP, 32 - ctz_imm); + __ Addu(out, dividend, TMP); + if (IsUint<16>(abs_imm - 1)) { + __ Andi(out, out, abs_imm - 1); + } else { + if (is_r2_or_newer) { + __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); + } else { + __ Sll(out, out, 32 - ctz_imm); + __ Srl(out, out, 32 - ctz_imm); + } + } + __ Subu(out, out, TMP); + } } } else { - if (ctz_imm == 1) { - // Fast path for modulo +/-2, which is very common. - __ Sra(TMP, dividend, 31); - __ Subu(out, dividend, TMP); - __ Andi(out, out, 1); - __ Addu(out, out, TMP); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); + Register out_high = locations->Out().AsRegisterPairHigh<Register>(); + Register out_low = locations->Out().AsRegisterPairLow<Register>(); + Register in_high = locations->InAt(0).AsRegisterPairHigh<Register>(); + Register in_low = locations->InAt(0).AsRegisterPairLow<Register>(); + int64_t imm = Int64FromConstant(second.GetConstant()); + uint64_t abs_imm = static_cast<uint64_t>(AbsOrMin(imm)); + int ctz_imm = CTZ(abs_imm); + + if (instruction->IsDiv()) { + if (ctz_imm < 32) { + if (ctz_imm == 1) { + __ Srl(AT, in_high, 31); + } else { + __ Sra(AT, in_high, 31); + __ Srl(AT, AT, 32 - ctz_imm); + } + __ Addu(AT, AT, in_low); + __ Sltu(TMP, AT, in_low); + __ Addu(out_high, in_high, TMP); + __ Srl(out_low, AT, ctz_imm); + if (is_r2_or_newer) { + __ Ins(out_low, out_high, 32 - ctz_imm, ctz_imm); + __ Sra(out_high, out_high, ctz_imm); + } else { + __ Sll(AT, out_high, 32 - ctz_imm); + __ Sra(out_high, out_high, ctz_imm); + __ Or(out_low, out_low, AT); + } + if (imm < 0) { + __ Subu(out_low, ZERO, out_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, ZERO, out_high); + __ Subu(out_high, out_high, AT); + } + } else if (ctz_imm == 32) { + __ Sra(AT, in_high, 31); + __ Addu(AT, AT, in_low); + __ Sltu(AT, AT, in_low); + __ Addu(out_low, in_high, AT); + if (imm < 0) { + __ Srl(TMP, out_low, 31); + __ Subu(out_low, ZERO, out_low); + __ Sltu(AT, ZERO, out_low); + __ Subu(out_high, TMP, AT); + } else { + __ Sra(out_high, out_low, 31); + } + } else if (ctz_imm < 63) { + __ Sra(AT, in_high, 31); + __ Srl(TMP, AT, 64 - ctz_imm); + __ Addu(AT, AT, in_low); + __ Sltu(AT, AT, in_low); + __ Addu(out_low, in_high, AT); + __ Addu(out_low, out_low, TMP); + __ Sra(out_low, out_low, ctz_imm - 32); + if (imm < 0) { + __ Subu(out_low, ZERO, out_low); + } + __ Sra(out_high, out_low, 31); + } else { + DCHECK_LT(imm, 0); + if (is_r6) { + __ Aui(AT, in_high, 0x8000); + } else { + __ Lui(AT, 0x8000); + __ Xor(AT, AT, in_high); + } + __ Or(AT, AT, in_low); + __ Sltiu(out_low, AT, 1); + __ Move(out_high, ZERO); + } } else { - __ Sra(TMP, dividend, 31); - __ Srl(TMP, TMP, 32 - ctz_imm); - __ Addu(out, dividend, TMP); - if (IsUint<16>(abs_imm - 1)) { - __ Andi(out, out, abs_imm - 1); + if ((ctz_imm == 1) && !is_r6) { + __ Andi(AT, in_low, 1); + __ Sll(TMP, in_low, 31); + __ And(TMP, in_high, TMP); + __ Sra(out_high, TMP, 31); + __ Or(out_low, out_high, AT); + } else if (ctz_imm < 32) { + __ Sra(AT, in_high, 31); + if (ctz_imm <= 16) { + __ Andi(out_low, in_low, abs_imm - 1); + } else if (is_r2_or_newer) { + __ Ext(out_low, in_low, 0, ctz_imm); + } else { + __ Sll(out_low, in_low, 32 - ctz_imm); + __ Srl(out_low, out_low, 32 - ctz_imm); + } + if (is_r6) { + __ Selnez(out_high, AT, out_low); + } else { + __ Movz(AT, ZERO, out_low); + __ Move(out_high, AT); + } + if (is_r2_or_newer) { + __ Ins(out_low, out_high, ctz_imm, 32 - ctz_imm); + } else { + __ Sll(AT, out_high, ctz_imm); + __ Or(out_low, out_low, AT); + } + } else if (ctz_imm == 32) { + __ Sra(AT, in_high, 31); + __ Move(out_low, in_low); + if (is_r6) { + __ Selnez(out_high, AT, out_low); + } else { + __ Movz(AT, ZERO, out_low); + __ Move(out_high, AT); + } + } else if (ctz_imm < 63) { + __ Sra(AT, in_high, 31); + __ Move(TMP, in_low); + if (ctz_imm - 32 <= 16) { + __ Andi(out_high, in_high, (1 << (ctz_imm - 32)) - 1); + } else if (is_r2_or_newer) { + __ Ext(out_high, in_high, 0, ctz_imm - 32); + } else { + __ Sll(out_high, in_high, 64 - ctz_imm); + __ Srl(out_high, out_high, 64 - ctz_imm); + } + __ Move(out_low, TMP); + __ Or(TMP, TMP, out_high); + if (is_r6) { + __ Selnez(AT, AT, TMP); + } else { + __ Movz(AT, ZERO, TMP); + } + if (is_r2_or_newer) { + __ Ins(out_high, AT, ctz_imm - 32, 64 - ctz_imm); + } else { + __ Sll(AT, AT, ctz_imm - 32); + __ Or(out_high, out_high, AT); + } } else { - __ Sll(out, out, 32 - ctz_imm); - __ Srl(out, out, 32 - ctz_imm); + if (is_r6) { + __ Aui(AT, in_high, 0x8000); + } else { + __ Lui(AT, 0x8000); + __ Xor(AT, AT, in_high); + } + __ Or(AT, AT, in_low); + __ Sltiu(AT, AT, 1); + __ Sll(AT, AT, 31); + __ Move(out_low, in_low); + __ Xor(out_high, in_high, AT); } - __ Subu(out, out, TMP); } } } void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3675,7 +4001,7 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemWithAnyConstant(HBinaryOperatio void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimInt); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt32); LocationSummary* locations = instruction->GetLocations(); Register out = locations->Out().AsRegister<Register>(); @@ -3714,32 +4040,47 @@ void InstructionCodeGeneratorMIPS::GenerateDivRemIntegral(HBinaryOperation* inst } void LocationsBuilderMIPS::VisitDiv(HDiv* div) { - Primitive::Type type = div->GetResultType(); - LocationSummary::CallKind call_kind = (type == Primitive::kPrimLong) + DataType::Type type = div->GetResultType(); + bool call_long_div = false; + if (type == DataType::Type::kInt64) { + if (div->InputAt(1)->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(div->InputAt(1)->AsConstant()); + call_long_div = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); + } else { + call_long_div = true; + } + } + LocationSummary::CallKind call_kind = call_long_div ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimLong: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); + case DataType::Type::kInt64: { + if (call_long_div) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(div->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); + } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -3751,24 +4092,36 @@ void LocationsBuilderMIPS::VisitDiv(HDiv* div) { } void InstructionCodeGeneratorMIPS::VisitDiv(HDiv* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: GenerateDivRemIntegral(instruction); break; - case Primitive::kPrimLong: { - codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); + case DataType::Type::kInt64: { + if (locations->InAt(1).IsConstant()) { + int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else { + DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); + DivRemByPowerOfTwo(instruction); + } + } else { + codegen_->InvokeRuntime(kQuickLdiv, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>(); + } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FRegister dst = locations->Out().AsFpuRegister<FRegister>(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ DivS(dst, lhs, rhs); } else { __ DivD(dst, lhs, rhs); @@ -3786,17 +4139,19 @@ void LocationsBuilderMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { } void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathMIPS(instruction); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathMIPS(instruction); codegen_->AddSlowPath(slow_path); Location value = instruction->GetLocations()->InAt(0); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (value.IsConstant()) { if (value.GetConstant()->AsIntConstant()->GetValue() == 0) { __ B(slow_path->GetEntryLabel()); @@ -3810,7 +4165,7 @@ void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (value.IsConstant()) { if (value.GetConstant()->AsLongConstant()->GetValue() == 0) { __ B(slow_path->GetEntryLabel()); @@ -3832,7 +4187,7 @@ void InstructionCodeGeneratorMIPS::VisitDivZeroCheck(HDivZeroCheck* instruction) void LocationsBuilderMIPS::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3849,7 +4204,7 @@ void InstructionCodeGeneratorMIPS::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { void LocationsBuilderMIPS::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3862,13 +4217,22 @@ void LocationsBuilderMIPS::VisitGoto(HGoto* got) { } void InstructionCodeGeneratorMIPS::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lw(AT, SP, kCurrentMethodStackOffset); + __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -4746,13 +5110,13 @@ void InstructionCodeGeneratorMIPS::GenerateLongCompareAndBranch(IfCondition cond void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations) { Register dst = locations->Out().AsRegister<Register>(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { if (isR6) { switch (cond) { case kCondEQ: @@ -4859,7 +5223,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond, } } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); if (isR6) { switch (cond) { case kCondEQ: @@ -4970,13 +5334,13 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond, bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* input_locations, int cc) { FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); CHECK(!codegen_->GetInstructionSetFeatures().IsR6()); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { switch (cond) { case kCondEQ: __ CeqS(cc, lhs, rhs); @@ -5017,7 +5381,7 @@ bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond, UNREACHABLE(); } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); switch (cond) { case kCondEQ: __ CeqD(cc, lhs, rhs); @@ -5062,13 +5426,13 @@ bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond, bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* input_locations, FRegister dst) { FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>(); CHECK(codegen_->GetInstructionSetFeatures().IsR6()); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { switch (cond) { case kCondEQ: __ CmpEqS(dst, lhs, rhs); @@ -5109,7 +5473,7 @@ bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond, UNREACHABLE(); } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); switch (cond) { case kCondEQ: __ CmpEqD(dst, lhs, rhs); @@ -5154,13 +5518,13 @@ bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond, void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations, MipsLabel* label) { FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { if (isR6) { switch (cond) { case kCondEQ: @@ -5255,7 +5619,7 @@ void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond, } } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); if (isR6) { switch (cond) { case kCondEQ: @@ -5397,7 +5761,7 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); - Primitive::Type type = condition->InputAt(0)->GetType(); + DataType::Type type = condition->InputAt(0)->GetType(); LocationSummary* locations = cond->GetLocations(); IfCondition if_cond = condition->GetCondition(); MipsLabel* branch_target = true_target; @@ -5411,11 +5775,11 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi default: GenerateIntCompareAndBranch(if_cond, locations, branch_target); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateLongCompareAndBranch(if_cond, locations, branch_target); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); break; } @@ -5429,7 +5793,7 @@ void InstructionCodeGeneratorMIPS::GenerateTestAndBranch(HInstruction* instructi } void LocationsBuilderMIPS::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -5446,7 +5810,7 @@ void InstructionCodeGeneratorMIPS::VisitIf(HIf* if_instr) { } void LocationsBuilderMIPS::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -5480,8 +5844,9 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l HInstruction* cond = select->InputAt(/* condition_input_index */ 2); HCondition* condition = cond->AsCondition(); - Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType(); - Primitive::Type dst_type = select->GetType(); + DataType::Type cond_type = + materialized ? DataType::Type::kInt32 : condition->InputAt(0)->GetType(); + DataType::Type dst_type = select->GetType(); HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); @@ -5523,7 +5888,7 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l use_const_for_true_in = is_true_value_zero_constant; } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // Moving long on int condition. if (is_r6) { if (is_true_value_zero_constant) { @@ -5546,8 +5911,8 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l use_const_for_true_in = is_true_value_zero_constant; } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: // Moving float/double on int condition. if (is_r6) { if (materialized) { @@ -5578,12 +5943,12 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l break; } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // We don't materialize long comparison now // and use conditional branches instead. break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: switch (dst_type) { default: // Moving int on float/double condition. @@ -5611,7 +5976,7 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l use_const_for_true_in = is_true_value_zero_constant; } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // Moving long on float/double condition. if (is_r6) { if (is_true_value_zero_constant) { @@ -5636,8 +6001,8 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l use_const_for_true_in = is_true_value_zero_constant; } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: // Moving float/double on float/double condition. if (is_r6) { can_move_conditionally = true; @@ -5673,7 +6038,7 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); } else { locations_to_set->SetInAt(0, - Primitive::IsFloatingPointType(dst_type) + DataType::IsFloatingPointType(dst_type) ? Location::RequiresFpuRegister() : Location::RequiresRegister()); } @@ -5681,7 +6046,7 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); } else { locations_to_set->SetInAt(1, - Primitive::IsFloatingPointType(dst_type) + DataType::IsFloatingPointType(dst_type) ? Location::RequiresFpuRegister() : Location::RequiresRegister()); } @@ -5694,7 +6059,7 @@ static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* l if (is_out_same_as_first_in) { locations_to_set->SetOut(Location::SameAsFirstInput()); } else { - locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type) + locations_to_set->SetOut(DataType::IsFloatingPointType(dst_type) ? Location::RequiresFpuRegister() : Location::RequiresRegister()); } @@ -5712,9 +6077,9 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { HInstruction* cond = select->InputAt(/* condition_input_index */ 2); Register cond_reg = TMP; int cond_cc = 0; - Primitive::Type cond_type = Primitive::kPrimInt; + DataType::Type cond_type = DataType::Type::kInt32; bool cond_inverted = false; - Primitive::Type dst_type = select->GetType(); + DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); @@ -5725,11 +6090,11 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { cond_type = condition->InputAt(0)->GetType(); switch (cond_type) { default: - DCHECK_NE(cond_type, Primitive::kPrimLong); + DCHECK_NE(cond_type, DataType::Type::kInt64); cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: cond_inverted = MaterializeFpCompareR2(if_cond, condition->IsGtBias(), cond_type, @@ -5759,7 +6124,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { __ Movn(dst.AsRegister<Register>(), src_reg, cond_reg); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (cond_inverted) { __ Movz(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg); __ Movz(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); @@ -5768,14 +6133,14 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { __ Movn(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg); } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: if (cond_inverted) { __ MovzS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); } else { __ MovnS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: if (cond_inverted) { __ MovzD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg); } else { @@ -5784,11 +6149,11 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { break; } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: LOG(FATAL) << "Unreachable"; UNREACHABLE(); - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: switch (dst_type) { default: if (cond_inverted) { @@ -5797,7 +6162,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { __ Movt(dst.AsRegister<Register>(), src_reg, cond_cc); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (cond_inverted) { __ Movf(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc); __ Movf(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); @@ -5806,14 +6171,14 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) { __ Movt(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc); } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: if (cond_inverted) { __ MovfS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); } else { __ MovtS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: if (cond_inverted) { __ MovfD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc); } else { @@ -5833,9 +6198,9 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { HInstruction* cond = select->InputAt(/* condition_input_index */ 2); Register cond_reg = TMP; FRegister fcond_reg = FTMP; - Primitive::Type cond_type = Primitive::kPrimInt; + DataType::Type cond_type = DataType::Type::kInt32; bool cond_inverted = false; - Primitive::Type dst_type = select->GetType(); + DataType::Type dst_type = select->GetType(); if (IsBooleanValueOrMaterializedCondition(cond)) { cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>(); @@ -5846,11 +6211,11 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { cond_type = condition->InputAt(0)->GetType(); switch (cond_type) { default: - DCHECK_NE(cond_type, Primitive::kPrimLong); + DCHECK_NE(cond_type, DataType::Type::kInt64); cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: cond_inverted = MaterializeFpCompareR6(if_cond, condition->IsGtBias(), cond_type, @@ -5869,7 +6234,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { switch (dst_type) { default: - if (Primitive::IsFloatingPointType(cond_type)) { + if (DataType::IsFloatingPointType(cond_type)) { __ Mfc1(cond_reg, fcond_reg); } if (true_src.IsConstant()) { @@ -5896,8 +6261,8 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { __ Or(dst.AsRegister<Register>(), AT, TMP); } break; - case Primitive::kPrimLong: { - if (Primitive::IsFloatingPointType(cond_type)) { + case DataType::Type::kInt64: { + if (DataType::IsFloatingPointType(cond_type)) { __ Mfc1(cond_reg, fcond_reg); } Register dst_lo = dst.AsRegisterPairLow<Register>(); @@ -5926,8 +6291,8 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { } break; } - case Primitive::kPrimFloat: { - if (!Primitive::IsFloatingPointType(cond_type)) { + case DataType::Type::kFloat32: { + if (!DataType::IsFloatingPointType(cond_type)) { // sel*.fmt tests bit 0 of the condition register, account for that. __ Sltu(TMP, ZERO, cond_reg); __ Mtc1(TMP, fcond_reg); @@ -5961,8 +6326,8 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { } break; } - case Primitive::kPrimDouble: { - if (!Primitive::IsFloatingPointType(cond_type)) { + case DataType::Type::kFloat64: { + if (!DataType::IsFloatingPointType(cond_type)) { // sel*.fmt tests bit 0 of the condition register, account for that. __ Sltu(TMP, ZERO, cond_reg); __ Mtc1(TMP, fcond_reg); @@ -6000,7 +6365,7 @@ void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) { } void LocationsBuilderMIPS::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -6013,7 +6378,7 @@ void InstructionCodeGeneratorMIPS::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFl } void LocationsBuilderMIPS::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations); } @@ -6038,7 +6403,7 @@ void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) { } void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorMIPS::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -6050,12 +6415,12 @@ void CodeGeneratorMIPS::GenerateNop() { } void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - Primitive::Type field_type = field_info.GetFieldType(); - bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); + DataType::Type field_type = field_info.GetFieldType(); + bool is_wide = (field_type == DataType::Type::kInt64) || (field_type == DataType::Type::kFloat64); bool generate_volatile = field_info.IsVolatile() && is_wide; bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + kEmitCompilerReadBarrier && (field_type == DataType::Type::kReference); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, generate_volatile ? LocationSummary::kCallOnMainOnly @@ -6071,18 +6436,18 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field InvokeRuntimeCallingConvention calling_convention; // need A0 to hold base + offset locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - if (field_type == Primitive::kPrimLong) { - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimLong)); + if (field_type == DataType::Type::kInt64) { + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt64)); } else { // Use Location::Any() to prevent situations when running out of available fp registers. locations->SetOut(Location::Any()); // Need some temp core regs since FP results are returned in core registers - Location reg = calling_convention.GetReturnLocation(Primitive::kPrimLong); + Location reg = calling_convention.GetReturnLocation(DataType::Type::kInt64); locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairLow<Register>())); locations->AddTemp(Location::RegisterLocation(reg.AsRegisterPairHigh<Register>())); } } else { - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps in the case of an object field get with @@ -6096,7 +6461,9 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } } @@ -6104,7 +6471,8 @@ void LocationsBuilderMIPS::HandleFieldGet(HInstruction* instruction, const Field void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc) { - Primitive::Type type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); Register obj = obj_loc.AsRegister<Register>(); @@ -6115,28 +6483,31 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: load_type = kLoadUnsignedByte; break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: load_type = kLoadSignedByte; break; - case Primitive::kPrimShort: - load_type = kLoadSignedHalfword; - break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: load_type = kLoadUnsignedHalfword; break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimNot: + case DataType::Type::kInt16: + load_type = kLoadSignedHalfword; + break; + case DataType::Type::kInt32: + case DataType::Type::kFloat32: + case DataType::Type::kReference: load_type = kLoadWord; break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: load_type = kLoadDoubleword; break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } @@ -6145,11 +6516,14 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, InvokeRuntimeCallingConvention calling_convention; __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check - __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); + __ LoadFromOffset(kLoadWord, + ZERO, + locations->GetTemp(0).AsRegister<Register>(), + 0, + null_checker); codegen_->InvokeRuntime(kQuickA64Load, instruction, dex_pc); CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>(); - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { // FP results are returned in core registers. Need to move them. if (dst_loc.IsFpuRegister()) { __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>()); @@ -6168,10 +6542,11 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, } } } else { - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6193,9 +6568,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // reference, if heap poisoning is enabled). codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset); } - } else if (!Primitive::IsFloatingPointType(type)) { + } else if (!DataType::IsFloatingPointType(type)) { Register dst; - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { DCHECK(dst_loc.IsRegisterPair()); dst = dst_loc.AsRegisterPairLow<Register>(); } else { @@ -6206,7 +6581,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, } else { DCHECK(dst_loc.IsFpuRegister()); FRegister dst = dst_loc.AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ LoadSFromOffset(dst, obj, offset, null_checker); } else { __ LoadDFromOffset(dst, obj, offset, null_checker); @@ -6216,16 +6591,16 @@ void InstructionCodeGeneratorMIPS::HandleFieldGet(HInstruction* instruction, // Memory barriers, in the case of references, are handled in the // previous switch statement. - if (is_volatile && (type != Primitive::kPrimNot)) { + if (is_volatile && (type != DataType::Type::kReference)) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info) { - Primitive::Type field_type = field_info.GetFieldType(); - bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble); + DataType::Type field_type = field_info.GetFieldType(); + bool is_wide = (field_type == DataType::Type::kInt64) || (field_type == DataType::Type::kFloat64); bool generate_volatile = field_info.IsVolatile() && is_wide; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); @@ -6233,7 +6608,7 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field InvokeRuntimeCallingConvention calling_convention; // need A0 to hold base + offset locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - if (field_type == Primitive::kPrimLong) { + if (field_type == DataType::Type::kInt64) { locations->SetInAt(1, Location::RegisterPairLocation( calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); } else { @@ -6244,7 +6619,7 @@ void LocationsBuilderMIPS::HandleFieldSet(HInstruction* instruction, const Field locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(3))); } } else { - if (Primitive::IsFloatingPointType(field_type)) { + if (DataType::IsFloatingPointType(field_type)) { locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); } else { locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); @@ -6256,7 +6631,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc, bool value_can_be_null) { - Primitive::Type type = field_info.GetFieldType(); + DataType::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); Register obj = locations->InAt(0).AsRegister<Register>(); Location value_location = locations->InAt(1); @@ -6267,24 +6642,27 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: store_type = kStoreByte; break; - case Primitive::kPrimShort: - case Primitive::kPrimChar: + case DataType::Type::kUint16: + case DataType::Type::kInt16: store_type = kStoreHalfword; break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimNot: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: + case DataType::Type::kReference: store_type = kStoreWord; break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } @@ -6297,9 +6675,12 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, InvokeRuntimeCallingConvention calling_convention; __ Addiu32(locations->GetTemp(0).AsRegister<Register>(), obj, offset); // Do implicit Null check. - __ Lw(ZERO, locations->GetTemp(0).AsRegister<Register>(), 0); - codegen_->RecordPcInfo(instruction, instruction->GetDexPc()); - if (type == Primitive::kPrimDouble) { + __ LoadFromOffset(kLoadWord, + ZERO, + locations->GetTemp(0).AsRegister<Register>(), + 0, + null_checker); + if (type == DataType::Type::kFloat64) { // Pass FP parameters in core registers. if (value_location.IsFpuRegister()) { __ Mfc1(locations->GetTemp(1).AsRegister<Register>(), @@ -6330,9 +6711,9 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, if (value_location.IsConstant()) { int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); - } else if (!Primitive::IsFloatingPointType(type)) { + } else if (!DataType::IsFloatingPointType(type)) { Register src; - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { src = value_location.AsRegisterPairLow<Register>(); } else { src = value_location.AsRegister<Register>(); @@ -6341,7 +6722,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not // need poisoning. - DCHECK_EQ(type, Primitive::kPrimNot); + DCHECK_EQ(type, DataType::Type::kReference); __ PoisonHeapReference(TMP, src); __ StoreToOffset(store_type, TMP, obj, offset, null_checker); } else { @@ -6349,7 +6730,7 @@ void InstructionCodeGeneratorMIPS::HandleFieldSet(HInstruction* instruction, } } else { FRegister src = value_location.AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ StoreSToOffset(src, obj, offset, null_checker); } else { __ StoreDToOffset(src, obj, offset, null_checker); @@ -6395,7 +6776,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister( Register out_reg = out.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -6435,7 +6818,9 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -6458,67 +6843,174 @@ void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters( } } +static inline int GetBakerMarkThunkNumber(Register reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 21, "Expecting equal"); + if (reg >= V0 && reg <= T7) { // 14 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 14 + (reg - S2); + } else if (reg == FP) { // One more. + return 20; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(Register reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(Register reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction, Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option) { + ReadBarrierOption read_barrier_option, + MipsLabel* label_low) { + bool reordering; + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } Register root_reg = root.AsRegister<Register>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + Register base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + reordering = __ SetReorder(false); + if (!short_offset) { + DCHECK(!label_low); + __ AddUpper(base, obj, offset_high); + } + MipsLabel skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + if (isR6) { + __ Jialc(T9, thunk_disp); + } else { + __ Addiu(T9, T9, thunk_disp); + __ Jalr(T9); + __ Nop(); + } + __ Bind(&skip_call); + __ SetReorder(reordering); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadWord, root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS* slow_path = + new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset); + __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset __ Addiu32(root_reg, obj, offset); + if (label_low != nullptr) { + __ SetReorder(reordering); + } // /* mirror::Object* */ root = root->Read() codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + reordering = __ SetReorder(false); + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadWord, root_reg, obj, offset); // Note that GC roots are not affected by heap poisoning, thus we // do not have to unpoison `root_reg` here. + if (label_low != nullptr) { + __ SetReorder(reordering); + } } } @@ -6531,6 +7023,92 @@ void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* inst DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool isR6 = GetInstructionSetFeatures().IsR6(); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lw. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register base = short_offset ? obj : TMP; + MipsLabel skip_call; + if (short_offset) { + if (isR6) { + __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + } else { + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Nop(); // In delay slot. + } + __ Bind(&skip_call); + } else { + if (isR6) { + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Aui(base, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + } else { + __ Lui(base, offset_high); + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Bind(&skip_call); + __ Addu(base, base, obj); // In delay slot. + } + } + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadWord, ref_reg, base, offset_low); // Single instruction. + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -6557,9 +7135,72 @@ void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* inst static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + bool reordering = __ SetReorder(false); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset); + Register ref_reg = ref.AsRegister<Register>(); + Register index_reg = index.IsRegisterPair() + ? index.AsRegisterPairLow<Register>() + : index.AsRegister<Register>(); + MipsLabel skip_call; + if (GetInstructionSetFeatures().IsR6()) { + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Lsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + } else { + __ Sll(TMP, index_reg, scale_factor); + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Addiu(T9, T9, thunk_disp); // In delay slot. + __ Jalr(T9); + __ Bind(&skip_call); + __ Addu(TMP, TMP, obj); // In delay slot. + } + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + __ SetReorder(reordering); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -6655,14 +7296,14 @@ void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* // to be null in this code path. DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetGraph()->GetArena()) + slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction, ref, obj, /* field_offset */ index, temp_reg); } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref); + slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS(instruction, ref); } AddSlowPath(slow_path); @@ -6698,7 +7339,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) + SlowPathCodeMIPS* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -6734,7 +7375,7 @@ void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathMIPS(instruction, out, root); AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); @@ -6749,11 +7390,12 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -6761,7 +7403,8 @@ void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -6800,13 +7443,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -6814,13 +7459,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. MipsLabel loop; @@ -6830,7 +7477,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ Bne(out, cls, &loop); @@ -6839,13 +7486,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. MipsLabel loop, success; __ Bind(&loop); @@ -6855,7 +7504,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnez(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ B(&done); @@ -6865,13 +7514,15 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. MipsLabel success; __ Beq(out, cls, &success); @@ -6881,7 +7532,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqz(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -6903,8 +7554,8 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ Bne(out, cls, slow_path->GetEntryLabel()); __ LoadConst32(out, 1); @@ -6932,8 +7583,8 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); break; @@ -6948,7 +7599,7 @@ void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) { } void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -6957,7 +7608,7 @@ void InstructionCodeGeneratorMIPS::VisitIntConstant(HIntConstant* constant ATTRI } void LocationsBuilderMIPS::VisitNullConstant(HNullConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -6984,10 +7635,6 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kMipsPointerSize); - // Set the hidden argument. - __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), - invoke->GetDexMethodIndex()); - // temp = object->GetClass(); if (receiver.IsStackSlot()) { __ LoadFromOffset(kLoadWord, temp, SP, receiver.GetStackIndex()); @@ -7012,6 +7659,9 @@ void InstructionCodeGeneratorMIPS::VisitInvokeInterface(HInvokeInterface* invoke __ LoadFromOffset(kLoadWord, temp, temp, method_offset); // T9 = temp->GetEntryPoint(); __ LoadFromOffset(kLoadWord, T9, temp, entry_point.Int32Value()); + // Set the hidden argument. + __ LoadConst32(invoke->GetLocations()->GetTemp(1).AsRegister<Register>(), + invoke->GetDexMethodIndex()); // T9(); __ Jalr(T9); __ NopIfNoReordering(); @@ -7034,7 +7684,8 @@ void LocationsBuilderMIPS::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invo DCHECK(!invoke->IsStaticWithExplicitClinitCheck()); bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6(); - bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6; + bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); + bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6 && !has_irreducible_loops; IntrinsicLocationsBuilderMIPS intrinsic(codegen_); if (intrinsic.TryDispatch(invoke)) { @@ -7072,73 +7723,49 @@ static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS* codegen HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { - // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods - // with irreducible loops. - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool is_r6 = GetInstructionSetFeatures().IsR6(); - bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; - case HLoadString::LoadKind::kBootImageAddress: - break; case HLoadString::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); - fallback_load = false; break; + case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kRuntimeCall: - fallback_load = false; break; } - if (fallback_load) { - desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall; - } return desired_string_load_kind; } HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind( HLoadClass::LoadKind desired_class_load_kind) { - // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods - // with irreducible loops. - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool is_r6 = GetInstructionSetFeatures().IsR6(); - bool fallback_load = has_irreducible_loops && !is_r6; switch (desired_class_load_kind) { case HLoadClass::LoadKind::kInvalid: LOG(FATAL) << "UNREACHABLE"; UNREACHABLE(); case HLoadClass::LoadKind::kReferrersClass: - fallback_load = false; break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; - case HLoadClass::LoadKind::kBootImageAddress: - break; case HLoadClass::LoadKind::kJitTableAddress: DCHECK(Runtime::Current()->UseJitCompilation()); - fallback_load = false; break; + case HLoadClass::LoadKind::kBootImageAddress: case HLoadClass::LoadKind::kRuntimeCall: - fallback_load = false; break; } - if (fallback_load) { - desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall; - } return desired_class_load_kind; } Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp) { CHECK(!GetInstructionSetFeatures().IsR6()); + CHECK(!GetGraph()->HasIrreducibleLoops()); CHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex()); if (!invoke->GetLocations()->Intrinsified()) { @@ -7166,27 +7793,7 @@ Register CodeGeneratorMIPS::GetInvokeStaticOrDirectExtraParameter(HInvokeStaticO HInvokeStaticOrDirect::DispatchInfo CodeGeneratorMIPS::GetSupportedInvokeStaticOrDirectDispatch( const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info, HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) { - HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info; - // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization - // is incompatible with it. - // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods - // with irreducible loops. - bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); - bool is_r6 = GetInstructionSetFeatures().IsR6(); - bool fallback_load = has_irreducible_loops && !is_r6; - switch (dispatch_info.method_load_kind) { - case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: - case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: - break; - default: - fallback_load = false; - break; - } - if (fallback_load) { - dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall; - dispatch_info.method_load_data = 0; - } - return dispatch_info; + return desired_dispatch_info; } void CodeGeneratorMIPS::GenerateStaticOrDirectCall( @@ -7196,7 +7803,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind(); HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation(); bool is_r6 = GetInstructionSetFeatures().IsR6(); - Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6) + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); + Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6 && !has_irreducible_loops) ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()) : ZERO; @@ -7216,14 +7824,12 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( break; case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); - PcRelativePatchInfo* info_high = NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + PcRelativePatchInfo* info_high = NewBootImageMethodPatch(invoke->GetTargetMethod()); PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); - bool reordering = __ SetReorder(false); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); Register temp_reg = temp.AsRegister<Register>(); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low); - __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: @@ -7235,10 +7841,8 @@ void CodeGeneratorMIPS::GenerateStaticOrDirectCall( PcRelativePatchInfo* info_low = NewMethodBssEntryPatch( MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()), info_high); Register temp_reg = temp.AsRegister<Register>(); - bool reordering = __ SetReorder(false); - EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg, info_low); - __ Lw(temp_reg, TMP, /* placeholder */ 0x5678); - __ SetReorder(reordering); + EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, base_reg); + __ Lw(temp_reg, TMP, /* placeholder */ 0x5678, &info_low->label); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -7339,11 +7943,12 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { } DCHECK(!cls->NeedsAccessCheck()); const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage(); LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -7351,10 +7956,17 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: if (isR6) { break; } + if (has_irreducible_loops) { + if (load_kind != HLoadClass::LoadKind::kBootImageAddress) { + codegen_->ClobberRA(); + } + break; + } FALLTHROUGH_INTENDED; case HLoadClass::LoadKind::kReferrersClass: locations->SetInAt(0, Location::RequiresRegister()); @@ -7366,8 +7978,6 @@ void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - // Request a temp to hold the BSS entry location for the slow path. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -7393,12 +8003,15 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF Register out = out_loc.AsRegister<Register>(); Register base_or_current_method_reg; bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: - base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + base_or_current_method_reg = + (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; case HLoadClass::LoadKind::kReferrersClass: case HLoadClass::LoadKind::kRuntimeCall: @@ -7413,7 +8026,6 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF ? kWithoutReadBarrier : kCompilerReadBarrierOption; bool generate_null_check = false; - CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high = nullptr; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: { DCHECK(!cls->CanCallRuntime()); @@ -7430,16 +8042,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); - bool reordering = __ SetReorder(false); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, - base_or_current_method_reg, - info_low); - __ Addiu(out, out, /* placeholder */ 0x5678); - __ SetReorder(reordering); + base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -7447,24 +8056,47 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(cls->GetClass().Get())); DCHECK_NE(address, 0u); - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + break; + } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_high = + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_low = + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, + out, + base_or_current_method_reg); + __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Addiu(out, out, -masked_hash); + } break; } case HLoadClass::LoadKind::kBssEntry: { - bss_info_high = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* bss_info_high = + codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high); - constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; - Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>(); - bool reordering = __ SetReorder(false); codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, - temp, - base_or_current_method_reg, - info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); - __ SetReorder(reordering); + out, + base_or_current_method_reg); + GenerateGcRootFieldLoad(cls, + out_loc, + out, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -7475,8 +8107,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); - GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option); __ SetReorder(reordering); + GenerateGcRootFieldLoad(cls, + out_loc, + out, + /* placeholder */ 0x5678, + read_barrier_option, + &info->low_label); break; } case HLoadClass::LoadKind::kRuntimeCall: @@ -7487,8 +8124,8 @@ void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAF if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck(), bss_info_high); + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqz(out, slow_path->GetEntryLabel()); @@ -7507,7 +8144,7 @@ static int32_t GetExceptionTlsOffset() { void LocationsBuilderMIPS::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -7517,7 +8154,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadException(HLoadException* load) { } void LocationsBuilderMIPS::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -7526,17 +8163,25 @@ void InstructionCodeGeneratorMIPS::VisitClearException(HClearException* clear AT void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); const bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + const bool has_irreducible_loops = codegen_->GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: if (isR6) { break; } + if (has_irreducible_loops) { + if (load_kind != HLoadString::LoadKind::kBootImageAddress) { + codegen_->ClobberRA(); + } + break; + } FALLTHROUGH_INTENDED; // We need an extra register for PC-relative dex cache accesses. case HLoadString::LoadKind::kRuntimeCall: @@ -7553,8 +8198,6 @@ void LocationsBuilderMIPS::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - // Request a temp to hold the BSS entry location for the slow path. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -7575,12 +8218,15 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ Register out = out_loc.AsRegister<Register>(); Register base_or_current_method_reg; bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); + bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops(); switch (load_kind) { // We need an extra register for PC-relative literals on R2. case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: - base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>(); + base_or_current_method_reg = + (isR6 || has_irreducible_loops) ? ZERO : locations->InAt(0).AsRegister<Register>(); break; default: base_or_current_method_reg = ZERO; @@ -7591,48 +8237,57 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - bool reordering = __ SetReorder(false); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out, - base_or_current_method_reg, - info_low); - __ Addiu(out, out, /* placeholder */ 0x5678); - __ SetReorder(reordering); - return; // No dex cache slow path. + base_or_current_method_reg); + __ Addiu(out, out, /* placeholder */ 0x5678, &info_low->label); + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); - __ LoadLiteral(out, - base_or_current_method_reg, - codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + if (isR6 || !has_irreducible_loops) { + __ LoadLiteral(out, + base_or_current_method_reg, + codegen_->DeduplicateBootImageAddressLiteral(address)); + } else { + __ LoadConst32(out, address); + } + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_high = + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS::PcRelativePatchInfo* info_low = + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, + out, + base_or_current_method_reg); + __ Lw(out, out, /* placeholder */ 0x5678, &info_low->label); + return; } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; - Register temp = non_baker_read_barrier ? out : locations->GetTemp(0).AsRegister<Register>(); - bool reordering = __ SetReorder(false); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, - temp, - base_or_current_method_reg, - info_low); + out, + base_or_current_method_reg); GenerateGcRootFieldLoad(load, out_loc, - temp, + out, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS* slow_path = - new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load, info_high); + new (codegen_->GetScopedAllocator()) LoadStringSlowPathMIPS(load); codegen_->AddSlowPath(slow_path); __ Beqz(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -7646,12 +8301,13 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ bool reordering = __ SetReorder(false); __ Bind(&info->high_label); __ Lui(out, /* placeholder */ 0x1234); + __ SetReorder(reordering); GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); - __ SetReorder(reordering); + kCompilerReadBarrierOption, + &info->low_label); return; } default: @@ -7668,7 +8324,7 @@ void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) NO_THREAD_ } void LocationsBuilderMIPS::VisitLongConstant(HLongConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -7677,8 +8333,8 @@ void InstructionCodeGeneratorMIPS::VisitLongConstant(HLongConstant* constant ATT } void LocationsBuilderMIPS::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -7695,17 +8351,17 @@ void InstructionCodeGeneratorMIPS::VisitMonitorOperation(HMonitorOperation* inst void LocationsBuilderMIPS::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -7717,12 +8373,12 @@ void LocationsBuilderMIPS::VisitMul(HMul* mul) { } void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register dst = locations->Out().AsRegister<Register>(); Register lhs = locations->InAt(0).AsRegister<Register>(); Register rhs = locations->InAt(1).AsRegister<Register>(); @@ -7734,7 +8390,7 @@ void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register lhs_high = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -7771,12 +8427,12 @@ void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) { } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FRegister dst = locations->Out().AsFpuRegister<FRegister>(); FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>(); FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ MulS(dst, lhs, rhs); } else { __ MulD(dst, lhs, rhs); @@ -7790,16 +8446,16 @@ void InstructionCodeGeneratorMIPS::VisitMul(HMul* instruction) { void LocationsBuilderMIPS::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -7810,17 +8466,17 @@ void LocationsBuilderMIPS::VisitNeg(HNeg* neg) { } void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register dst = locations->Out().AsRegister<Register>(); Register src = locations->InAt(0).AsRegister<Register>(); __ Subu(dst, ZERO, src); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -7831,11 +8487,11 @@ void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) { __ Subu(dst_high, dst_high, TMP); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FRegister dst = locations->Out().AsFpuRegister<FRegister>(); FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ NegS(dst, src); } else { __ NegD(dst, src); @@ -7848,10 +8504,10 @@ void InstructionCodeGeneratorMIPS::VisitNeg(HNeg* instruction) { } void LocationsBuilderMIPS::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } @@ -7867,15 +8523,15 @@ void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) { } void LocationsBuilderMIPS::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { @@ -7897,24 +8553,24 @@ void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) { } void LocationsBuilderMIPS::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorMIPS::VisitNot(HNot* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register dst = locations->Out().AsRegister<Register>(); Register src = locations->InAt(0).AsRegister<Register>(); __ Nor(dst, src, ZERO); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register src_high = locations->InAt(0).AsRegisterPairHigh<Register>(); @@ -7930,7 +8586,7 @@ void InstructionCodeGeneratorMIPS::VisitNot(HNot* instruction) { } void LocationsBuilderMIPS::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -7958,7 +8614,7 @@ void CodeGeneratorMIPS::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorMIPS::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS(instruction); + SlowPathCodeMIPS* slow_path = new (GetScopedAllocator()) NullCheckSlowPathMIPS(instruction); AddSlowPath(slow_path); Location obj = instruction->GetLocations()->InAt(0); @@ -7983,11 +8639,18 @@ void LocationsBuilderMIPS::VisitParallelMove(HParallelMove* instruction ATTRIBUT } void InstructionCodeGeneratorMIPS::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderMIPS::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -8004,7 +8667,7 @@ void InstructionCodeGeneratorMIPS::VisitParameterValue(HParameterValue* instruct void LocationsBuilderMIPS::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } @@ -8014,7 +8677,7 @@ void InstructionCodeGeneratorMIPS::VisitCurrentMethod(HCurrentMethod* instructio } void LocationsBuilderMIPS::VisitPhi(HPhi* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -8026,30 +8689,44 @@ void InstructionCodeGeneratorMIPS::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) } void LocationsBuilderMIPS::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = - (type == Primitive::kPrimInt) ? LocationSummary::kNoCall : LocationSummary::kCallOnMainOnly; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + DataType::Type type = rem->GetResultType(); + bool call_rem; + if ((type == DataType::Type::kInt64) && rem->InputAt(1)->IsConstant()) { + int64_t imm = CodeGenerator::GetInt64ValueOf(rem->InputAt(1)->AsConstant()); + call_rem = (imm != 0) && !IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm))); + } else { + call_rem = (type != DataType::Type::kInt32); + } + LocationSummary::CallKind call_kind = call_rem + ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimLong: { - InvokeRuntimeCallingConvention calling_convention; - locations->SetInAt(0, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); - locations->SetInAt(1, Location::RegisterPairLocation( - calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(type)); + case DataType::Type::kInt64: { + if (call_rem) { + InvokeRuntimeCallingConvention calling_convention; + locations->SetInAt(0, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); + locations->SetInAt(1, Location::RegisterPairLocation( + calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3))); + locations->SetOut(calling_convention.GetReturnLocation(type)); + } else { + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant())); + locations->SetOut(Location::RequiresRegister()); + } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); @@ -8063,23 +8740,36 @@ void LocationsBuilderMIPS::VisitRem(HRem* rem) { } void InstructionCodeGeneratorMIPS::VisitRem(HRem* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); + LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: GenerateDivRemIntegral(instruction); break; - case Primitive::kPrimLong: { - codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); - CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); + case DataType::Type::kInt64: { + if (locations->InAt(1).IsConstant()) { + int64_t imm = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue(); + if (imm == 0) { + // Do not generate anything. DivZeroCheck would prevent any code to be executed. + } else if (imm == 1 || imm == -1) { + DivRemOneOrMinusOne(instruction); + } else { + DCHECK(IsPowerOfTwo(static_cast<uint64_t>(AbsOrMin(imm)))); + DivRemByPowerOfTwo(instruction); + } + } else { + codegen_->InvokeRuntime(kQuickLmod, instruction, instruction->GetDexPc()); + CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>(); + } break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { codegen_->InvokeRuntime(kQuickFmodf, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickFmodf, float, float, float>(); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { codegen_->InvokeRuntime(kQuickFmod, instruction, instruction->GetDexPc()); CheckEntrypointTypes<kQuickFmod, double, double, double>(); break; @@ -8107,8 +8797,8 @@ void InstructionCodeGeneratorMIPS::VisitMemoryBarrier(HMemoryBarrier* memory_bar } void LocationsBuilderMIPS::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret); - Primitive::Type return_type = ret->InputAt(0)->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret); + DataType::Type return_type = ret->InputAt(0)->GetType(); locations->SetInAt(0, MipsReturnLocation(return_type)); } @@ -8248,8 +8938,8 @@ void InstructionCodeGeneratorMIPS::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). @@ -8272,8 +8962,8 @@ void InstructionCodeGeneratorMIPS::VisitSuspendCheck(HSuspendCheck* instruction) } void LocationsBuilderMIPS::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -8284,33 +8974,35 @@ void InstructionCodeGeneratorMIPS::VisitThrow(HThrow* instruction) { } void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { - Primitive::Type input_type = conversion->GetInputType(); - Primitive::Type result_type = conversion->GetResultType(); - DCHECK_NE(input_type, result_type); + DataType::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || - (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { + if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || + (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } LocationSummary::CallKind call_kind = LocationSummary::kNoCall; if (!isR6 && - ((Primitive::IsFloatingPointType(result_type) && input_type == Primitive::kPrimLong) || - (result_type == Primitive::kPrimLong && Primitive::IsFloatingPointType(input_type)))) { + ((DataType::IsFloatingPointType(result_type) && input_type == DataType::Type::kInt64) || + (result_type == DataType::Type::kInt64 && DataType::IsFloatingPointType(input_type)))) { call_kind = LocationSummary::kCallOnMainOnly; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); if (call_kind == LocationSummary::kNoCall) { - if (Primitive::IsFloatingPointType(input_type)) { + if (DataType::IsFloatingPointType(input_type)) { locations->SetInAt(0, Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresRegister()); } - if (Primitive::IsFloatingPointType(result_type)) { + if (DataType::IsFloatingPointType(result_type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -8318,10 +9010,10 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { } else { InvokeRuntimeCallingConvention calling_convention; - if (Primitive::IsFloatingPointType(input_type)) { + if (DataType::IsFloatingPointType(input_type)) { locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); } else { - DCHECK_EQ(input_type, Primitive::kPrimLong); + DCHECK_EQ(input_type, DataType::Type::kInt64); locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); } @@ -8332,14 +9024,15 @@ void LocationsBuilderMIPS::VisitTypeConversion(HTypeConversion* conversion) { void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversion) { LocationSummary* locations = conversion->GetLocations(); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); bool has_sign_extension = codegen_->GetInstructionSetFeatures().IsMipsIsaRevGreaterThanEqual2(); bool isR6 = codegen_->GetInstructionSetFeatures().IsR6(); - DCHECK_NE(input_type, result_type); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; - if (result_type == Primitive::kPrimLong && Primitive::IsIntegralType(input_type)) { + if (result_type == DataType::Type::kInt64 && DataType::IsIntegralType(input_type)) { Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); Register src = locations->InAt(0).AsRegister<Register>(); @@ -8348,17 +9041,17 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Move(dst_low, src); } __ Sra(dst_high, src, 31); - } else if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + } else if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { Register dst = locations->Out().AsRegister<Register>(); - Register src = (input_type == Primitive::kPrimLong) + Register src = (input_type == DataType::Type::kInt64) ? locations->InAt(0).AsRegisterPairLow<Register>() : locations->InAt(0).AsRegister<Register>(); switch (result_type) { - case Primitive::kPrimChar: - __ Andi(dst, src, 0xFFFF); + case DataType::Type::kUint8: + __ Andi(dst, src, 0xFF); break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: if (has_sign_extension) { __ Seb(dst, src); } else { @@ -8366,7 +9059,10 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Sra(dst, dst, 24); } break; - case Primitive::kPrimShort: + case DataType::Type::kUint16: + __ Andi(dst, src, 0xFFFF); + break; + case DataType::Type::kInt16: if (has_sign_extension) { __ Seh(dst, src); } else { @@ -8374,7 +9070,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Sra(dst, dst, 16); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (dst != src) { __ Move(dst, src); } @@ -8384,8 +9080,8 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { - if (input_type == Primitive::kPrimLong) { + } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { + if (input_type == DataType::Type::kInt64) { if (isR6) { // cvt.s.l/cvt.d.l requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. @@ -8394,16 +9090,16 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src_low, FTMP); __ Mthc1(src_high, FTMP); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsl(dst, FTMP); } else { __ Cvtdl(dst, FTMP); } } else { - QuickEntrypointEnum entrypoint = (result_type == Primitive::kPrimFloat) ? kQuickL2f - : kQuickL2d; + QuickEntrypointEnum entrypoint = + (result_type == DataType::Type::kFloat32) ? kQuickL2f : kQuickL2d; codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { CheckEntrypointTypes<kQuickL2f, float, int64_t>(); } else { CheckEntrypointTypes<kQuickL2d, double, int64_t>(); @@ -8413,14 +9109,14 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Register src = locations->InAt(0).AsRegister<Register>(); FRegister dst = locations->Out().AsFpuRegister<FRegister>(); __ Mtc1(src, FTMP); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsw(dst, FTMP); } else { __ Cvtdw(dst, FTMP); } } - } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { - CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); + } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { + CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); // When NAN2008=1 (R6), the truncate instruction caps the output at the minimum/maximum // value of the output type if the input is outside of the range after the truncation or @@ -8438,7 +9134,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi // instruction, which will handle such an input the same way irrespective of NAN2008. // Otherwise the input is compared to itself to determine whether it is a NaN or not // in order to return either zero or the minimum value. - if (result_type == Primitive::kPrimLong) { + if (result_type == DataType::Type::kInt64) { if (isR6) { // trunc.l.s/trunc.l.d requires MIPSR2+ with FR=1. MIPS32R6 is implemented as a secondary // architecture on top of MIPS64R6, which has FR=1, and therefore can use the instruction. @@ -8446,7 +9142,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi Register dst_high = locations->Out().AsRegisterPairHigh<Register>(); Register dst_low = locations->Out().AsRegisterPairLow<Register>(); - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { __ TruncLS(FTMP, src); } else { __ TruncLD(FTMP, src); @@ -8454,10 +9150,10 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Mfc1(dst_low, FTMP); __ Mfhc1(dst_high, FTMP); } else { - QuickEntrypointEnum entrypoint = (input_type == Primitive::kPrimFloat) ? kQuickF2l - : kQuickD2l; + QuickEntrypointEnum entrypoint = + (input_type == DataType::Type::kFloat32) ? kQuickF2l : kQuickD2l; codegen_->InvokeRuntime(entrypoint, conversion, conversion->GetDexPc()); - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { CheckEntrypointTypes<kQuickF2l, int64_t, float>(); } else { CheckEntrypointTypes<kQuickD2l, int64_t, double>(); @@ -8470,7 +9166,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi MipsLabel done; if (!isR6) { - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { uint32_t min_val = bit_cast<uint32_t, float>(std::numeric_limits<int32_t>::min()); __ LoadConst32(TMP, min_val); __ Mtc1(TMP, FTMP); @@ -8481,14 +9177,14 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ MoveToFpuHigh(TMP, FTMP); } - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { __ ColeS(0, FTMP, src); } else { __ ColeD(0, FTMP, src); } __ Bc1t(0, &truncate); - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { __ CeqS(0, src, src); } else { __ CeqD(0, src, src); @@ -8501,7 +9197,7 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Bind(&truncate); } - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { __ TruncWS(FTMP, src); } else { __ TruncWD(FTMP, src); @@ -8512,11 +9208,11 @@ void InstructionCodeGeneratorMIPS::VisitTypeConversion(HTypeConversion* conversi __ Bind(&done); } } - } else if (Primitive::IsFloatingPointType(result_type) && - Primitive::IsFloatingPointType(input_type)) { + } else if (DataType::IsFloatingPointType(result_type) && + DataType::IsFloatingPointType(input_type)) { FRegister dst = locations->Out().AsFpuRegister<FRegister>(); FRegister src = locations->InAt(0).AsFpuRegister<FRegister>(); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsd(dst, src); } else { __ Cvtds(dst, src); @@ -8635,8 +9331,17 @@ void InstructionCodeGeneratorMIPS::VisitAboveOrEqual(HAboveOrEqual* comp) { void LocationsBuilderMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); + if (!codegen_->GetInstructionSetFeatures().IsR6()) { + uint32_t num_entries = switch_instr->GetNumEntries(); + if (num_entries > InstructionCodeGeneratorMIPS::kPackedSwitchJumpTableThreshold) { + // When there's no HMipsComputeBaseMethodAddress input, R2 uses the NAL + // instruction to simulate PC-relative addressing when accessing the jump table. + // NAL clobbers RA. Make sure RA is preserved. + codegen_->ClobberRA(); + } + } } void InstructionCodeGeneratorMIPS::GenPackedSwitchWithCompares(Register value_reg, @@ -8720,13 +9425,17 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr HBasicBlock* switch_block = switch_instr->GetBlock(); HBasicBlock* default_block = switch_instr->GetDefaultBlock(); - if (codegen_->GetInstructionSetFeatures().IsR6() && - num_entries > kPackedSwitchJumpTableThreshold) { + if (num_entries > kPackedSwitchJumpTableThreshold) { // R6 uses PC-relative addressing to access the jump table. - // R2, OTOH, requires an HMipsComputeBaseMethodAddress input to access - // the jump table and it is implemented by changing HPackedSwitch to - // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress. - // See VisitMipsPackedSwitch() for the table-based implementation on R2. + // + // R2, OTOH, uses an HMipsComputeBaseMethodAddress input (when available) + // to access the jump table and it is implemented by changing HPackedSwitch to + // HMipsPackedSwitch, which bears HMipsComputeBaseMethodAddress (see + // VisitMipsPackedSwitch()). + // + // When there's no HMipsComputeBaseMethodAddress input (e.g. in presence of + // irreducible loops), R2 uses the NAL instruction to simulate PC-relative + // addressing. GenTableBasedPackedSwitch(value_reg, ZERO, lower_bound, @@ -8744,7 +9453,7 @@ void InstructionCodeGeneratorMIPS::VisitPackedSwitch(HPackedSwitch* switch_instr void LocationsBuilderMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); // Constant area pointer (HMipsComputeBaseMethodAddress). locations->SetInAt(1, Location::RequiresRegister()); @@ -8773,7 +9482,7 @@ void InstructionCodeGeneratorMIPS::VisitMipsPackedSwitch(HMipsPackedSwitch* swit void LocationsBuilderMIPS::VisitMipsComputeBaseMethodAddress( HMipsComputeBaseMethodAddress* insn) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -8806,7 +9515,7 @@ void InstructionCodeGeneratorMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invo void LocationsBuilderMIPS::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -8834,6 +9543,16 @@ void InstructionCodeGeneratorMIPS::VisitClassTableGet(HClassTableGet* instructio } } +void LocationsBuilderMIPS::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorMIPS::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + #undef __ #undef QUICK_ENTRY_POINT diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h index 52ee852269..c91cb62eda 100644 --- a/compiler/optimizing/code_generator_mips.h +++ b/compiler/optimizing/code_generator_mips.h @@ -18,12 +18,12 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS_H_ #include "code_generator.h" -#include "dex_file_types.h" +#include "dex/dex_file_types.h" +#include "dex/string_reference.h" +#include "dex/type_reference.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "string_reference.h" -#include "type_reference.h" #include "utils/mips/assembler_mips.h" namespace art { @@ -81,8 +81,8 @@ class InvokeDexCallingConventionVisitorMIPS : public InvokeDexCallingConventionV InvokeDexCallingConventionVisitorMIPS() {} virtual ~InvokeDexCallingConventionVisitorMIPS() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type type) const OVERRIDE; Location GetMethodLocation() const OVERRIDE; private: @@ -100,7 +100,7 @@ class InvokeRuntimeCallingConvention : public CallingConvention<Register, FRegis kRuntimeParameterFpuRegistersLength, kMipsPointerSize) {} - Location GetReturnLocation(Primitive::Type return_type); + Location GetReturnLocation(DataType::Type return_type); private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -116,17 +116,17 @@ class FieldAccessCallingConventionMIPS : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const OVERRIDE { return Location::RegisterLocation(A0); } - Location GetReturnLocation(Primitive::Type type) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetReturnLocation(DataType::Type type) const OVERRIDE { + return DataType::Is64BitType(type) ? Location::RegisterPairLocation(V0, V1) : Location::RegisterLocation(V0); } - Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + return DataType::Is64BitType(type) ? Location::RegisterPairLocation(A2, A3) : (is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1)); } - Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::FpuRegisterLocation(F0); } @@ -145,6 +145,7 @@ class ParallelMoveResolverMIPS : public ParallelMoveResolverWithSwap { void RestoreScratch(int reg) OVERRIDE; void Exchange(int index1, int index2, bool double_slot); + void ExchangeQuadSlots(int index1, int index2); MipsAssembler* GetAssembler() const; @@ -285,7 +286,8 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { Location root, Register obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + MipsLabel* label_low = nullptr); void GenerateIntCompare(IfCondition cond, LocationSummary* locations); // When the function returns `false` it means that the condition holds if `dst` is non-zero @@ -303,14 +305,14 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { MipsLabel* label); void GenerateFpCompare(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations); // When the function returns `false` it means that the condition holds if the condition // code flag `cc` is non-zero and doesn't hold if `cc` is zero. If it returns `true`, // the roles of zero and non-zero values of the `cc` flag are exchanged. bool MaterializeFpCompareR2(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* input_locations, int cc); // When the function returns `false` it means that the condition holds if `dst` is non-zero @@ -318,12 +320,12 @@ class InstructionCodeGeneratorMIPS : public InstructionCodeGenerator { // `dst` are exchanged. bool MaterializeFpCompareR6(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* input_locations, FRegister dst); void GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations, MipsLabel* label); void GenerateTestAndBranch(HInstruction* instruction, @@ -394,7 +396,7 @@ class CodeGeneratorMIPS : public CodeGenerator { const MipsAssembler& GetAssembler() const OVERRIDE { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap @@ -517,7 +519,7 @@ class CodeGeneratorMIPS : public CodeGenerator { // Code generation helpers. - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; @@ -540,8 +542,8 @@ class CodeGeneratorMIPS : public CodeGenerator { ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { - return type == Primitive::kPrimLong; + bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + return type == DataType::Type::kInt64; } // Check if the desired_string_load_kind is supported. If it is, return it, @@ -566,7 +568,7 @@ class CodeGeneratorMIPS : public CodeGenerator { HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS"; } @@ -574,8 +576,9 @@ class CodeGeneratorMIPS : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays - // and boot image strings. The only difference is the interpretation of the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -590,21 +593,14 @@ class CodeGeneratorMIPS : public CodeGenerator { // ... // sw r2, low(r1) // patch // b back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<MipsLabel> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<MipsLabel>(dex_file, off_or_idx), pc_rel_label(), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type index. - uint32_t offset_or_index; - // Label for the instruction to patch. - MipsLabel label; // Label for the instruction corresponding to PC+0. Not bound or used in low half patches. // Not bound in high half patches on R2 when using HMipsComputeBaseMethodAddress. // Bound in high half patches on R2 when using the NAL instruction instead of @@ -619,25 +615,27 @@ class CodeGeneratorMIPS : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint32_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, Register out, - Register base, - PcRelativePatchInfo* info_low); + Register base); // The JitPatchInfo is used for JIT string and class loads. struct JitPatchInfo { @@ -649,8 +647,9 @@ class CodeGeneratorMIPS : public CodeGenerator { // String/type index. uint64_t index; // Label for the instruction loading the most significant half of the address. - // The least significant half is loaded with the instruction that follows immediately. MipsLabel high_label; + // Label for the instruction supplying the least significant half of the address. + MipsLabel low_label; }; void PatchJitRootUse(uint8_t* code, @@ -658,10 +657,10 @@ class CodeGeneratorMIPS : public CodeGenerator { const JitPatchInfo& info, uint64_t index_in_table) const; JitPatchInfo* NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle); JitPatchInfo* NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle); private: @@ -670,14 +669,14 @@ class CodeGeneratorMIPS : public CodeGenerator { using Uint32ToLiteralMap = ArenaSafeMap<uint32_t, Literal*>; Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); // Labels for each block that will be compiled. MipsLabel* block_labels_; @@ -691,15 +690,17 @@ class CodeGeneratorMIPS : public CodeGenerator { // Deduplication map for 32-bit literals, used for non-patchable boot image addresses. Uint32ToLiteralMap uint32_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative String patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<JitPatchInfo> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 454a2ddc14..985ac2ca55 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -16,17 +16,22 @@ #include "code_generator_mips64.h" +#include "arch/mips64/asm_support_mips64.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_mips64.h" +#include "linker/linker_patch.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" +#include "stack_map_stream.h" #include "thread.h" #include "utils/assembler.h" #include "utils/mips64/assembler_mips64.h" @@ -38,28 +43,36 @@ namespace mips64 { static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; -Location Mips64ReturnLocation(Primitive::Type return_type) { +// Flags controlling the use of thunks for Baker read barriers. +constexpr bool kBakerReadBarrierThunksEnableForFields = true; +constexpr bool kBakerReadBarrierThunksEnableForArrays = true; +constexpr bool kBakerReadBarrierThunksEnableForGcRoots = true; + +Location Mips64ReturnLocation(DataType::Type return_type) { switch (return_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: + case DataType::Type::kReference: + case DataType::Type::kUint64: + case DataType::Type::kInt64: return Location::RegisterLocation(V0); - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: return Location::FpuRegisterLocation(F0); - case Primitive::kPrimVoid: + case DataType::Type::kVoid: return Location(); } UNREACHABLE(); } -Location InvokeDexCallingConventionVisitorMIPS64::GetReturnLocation(Primitive::Type type) const { +Location InvokeDexCallingConventionVisitorMIPS64::GetReturnLocation(DataType::Type type) const { return Mips64ReturnLocation(type); } @@ -67,34 +80,34 @@ Location InvokeDexCallingConventionVisitorMIPS64::GetMethodLocation() const { return Location::RegisterLocation(kMethodRegisterArgument); } -Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorMIPS64::GetNextLocation(DataType::Type type) { Location next_location; - if (type == Primitive::kPrimVoid) { + if (type == DataType::Type::kVoid) { LOG(FATAL) << "Unexpected parameter type " << type; } - if (Primitive::IsFloatingPointType(type) && + if (DataType::IsFloatingPointType(type) && (float_index_ < calling_convention.GetNumberOfFpuRegisters())) { next_location = Location::FpuRegisterLocation( calling_convention.GetFpuRegisterAt(float_index_++)); gp_index_++; - } else if (!Primitive::IsFloatingPointType(type) && + } else if (!DataType::IsFloatingPointType(type) && (gp_index_ < calling_convention.GetNumberOfRegisters())) { next_location = Location::RegisterLocation(calling_convention.GetRegisterAt(gp_index_++)); float_index_++; } else { size_t stack_offset = calling_convention.GetStackOffsetOf(stack_index_); - next_location = Primitive::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) - : Location::StackSlot(stack_offset); + next_location = DataType::Is64BitType(type) ? Location::DoubleStackSlot(stack_offset) + : Location::StackSlot(stack_offset); } // Space on the stack is reserved for all arguments. - stack_index_ += Primitive::Is64BitType(type) ? 2 : 1; + stack_index_ += DataType::Is64BitType(type) ? 2 : 1; return next_location; } -Location InvokeRuntimeCallingConvention::GetReturnLocation(Primitive::Type type) { +Location InvokeRuntimeCallingConvention::GetReturnLocation(DataType::Type type) { return Mips64ReturnLocation(type); } @@ -119,10 +132,10 @@ class BoundsCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimInt, + DataType::Type::kInt32, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt); + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -164,13 +177,11 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { LoadClassSlowPathMIPS64(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, - bool do_clinit, - const CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high = nullptr) + bool do_clinit) : SlowPathCodeMIPS64(at), cls_(cls), dex_pc_(dex_pc), - do_clinit_(do_clinit), - bss_info_high_(bss_info_high) { + do_clinit_(do_clinit) { DCHECK(at->IsLoadClass() || at->IsClinitCheck()); } @@ -178,28 +189,11 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { LocationSummary* locations = instruction_->GetLocations(); Location out = locations->Out(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - const bool baker_or_no_read_barriers = (!kUseReadBarrier || kUseBakerReadBarrier); InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - const bool is_load_class_bss_entry = - (cls_ == instruction_) && (cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry); __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. - GpuRegister entry_address = kNoGpuRegister; - if (is_load_class_bss_entry && baker_or_no_read_barriers) { - GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>(); - bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); - // In the unlucky case that `temp` is A0, we preserve the address in `out` across the - // kSaveEverything call. - entry_address = temp_is_a0 ? out.AsRegister<GpuRegister>() : temp; - DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); - if (temp_is_a0) { - __ Move(entry_address, temp); - } - } - dex::TypeIndex type_index = cls_->GetTypeIndex(); __ LoadConst32(calling_convention.GetRegisterAt(0), type_index.index_); QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage @@ -211,40 +205,16 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>(); } - // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. - if (is_load_class_bss_entry && baker_or_no_read_barriers) { - // The class entry address was preserved in `entry_address` thanks to kSaveEverything. - DCHECK(bss_info_high_); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, bss_info_high_); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - } - // Move the class to the desired location. if (out.IsValid()) { DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg())); - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); mips64_codegen->MoveLocation(out, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); } RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved class to the BSS entry. - if (is_load_class_bss_entry && !baker_or_no_read_barriers) { - // For non-Baker read barriers we need to re-calculate the address of - // the class entry. - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewTypeBssEntryPatch(cls_->GetDexFile(), type_index, info_high); - mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, info_low); - __ StoreToOffset(kStoreWord, out.AsRegister<GpuRegister>(), TMP, /* placeholder */ 0x5678); - } __ Bc(GetExitLabel()); } @@ -260,46 +230,25 @@ class LoadClassSlowPathMIPS64 : public SlowPathCodeMIPS64 { // Whether to initialize the class. const bool do_clinit_; - // Pointer to the high half PC-relative patch info for HLoadClass/kBssEntry. - const CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high_; - DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathMIPS64); }; class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { public: - explicit LoadStringSlowPathMIPS64(HLoadString* instruction, - const CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high) - : SlowPathCodeMIPS64(instruction), bss_info_high_(bss_info_high) {} + explicit LoadStringSlowPathMIPS64(HLoadString* instruction) + : SlowPathCodeMIPS64(instruction) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { DCHECK(instruction_->IsLoadString()); DCHECK_EQ(instruction_->AsLoadString()->GetLoadKind(), HLoadString::LoadKind::kBssEntry); LocationSummary* locations = instruction_->GetLocations(); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg())); - HLoadString* load = instruction_->AsLoadString(); - const dex::StringIndex string_index = load->GetStringIndex(); - GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex(); CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); - const bool baker_or_no_read_barriers = (!kUseReadBarrier || kUseBakerReadBarrier); InvokeRuntimeCallingConvention calling_convention; __ Bind(GetEntryLabel()); SaveLiveRegisters(codegen, locations); - // For HLoadString/kBssEntry/kSaveEverything, make sure we preserve the address of the entry. - GpuRegister entry_address = kNoGpuRegister; - if (baker_or_no_read_barriers) { - GpuRegister temp = locations->GetTemp(0).AsRegister<GpuRegister>(); - bool temp_is_a0 = (temp == calling_convention.GetRegisterAt(0)); - // In the unlucky case that `temp` is A0, we preserve the address in `out` across the - // kSaveEverything call. - entry_address = temp_is_a0 ? out : temp; - DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0)); - if (temp_is_a0) { - __ Move(entry_address, temp); - } - } - __ LoadConst32(calling_convention.GetRegisterAt(0), string_index.index_); mips64_codegen->InvokeRuntime(kQuickResolveString, instruction_, @@ -307,47 +256,18 @@ class LoadStringSlowPathMIPS64 : public SlowPathCodeMIPS64 { this); CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>(); - // Store the resolved string to the BSS entry. - if (baker_or_no_read_barriers) { - // The string entry address was preserved in `entry_address` thanks to kSaveEverything. - DCHECK(bss_info_high_); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), - string_index, - bss_info_high_); - __ Bind(&info_low->label); - __ StoreToOffset(kStoreWord, - calling_convention.GetRegisterAt(0), - entry_address, - /* placeholder */ 0x5678); - } - - Primitive::Type type = instruction_->GetType(); + DataType::Type type = instruction_->GetType(); mips64_codegen->MoveLocation(locations->Out(), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), type); RestoreLiveRegisters(codegen, locations); - // Store the resolved string to the BSS entry. - if (!baker_or_no_read_barriers) { - // For non-Baker read barriers we need to re-calculate the address of - // the string entry. - CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index); - CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - mips64_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index, info_high); - mips64_codegen->EmitPcRelativeAddressPlaceholderHigh(info_high, TMP, info_low); - __ StoreToOffset(kStoreWord, out, TMP, /* placeholder */ 0x5678); - } __ Bc(GetExitLabel()); } const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathMIPS64"; } private: - // Pointer to the high half PC-relative patch info. - const CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high_; - DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathMIPS64); }; @@ -404,6 +324,10 @@ class SuspendCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathMIPS64"; } + HBasicBlock* GetSuccessor() const { + return successor_; + } + private: // If not null, the block to branch to after the suspend check. HBasicBlock* const successor_; @@ -428,7 +352,7 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -437,14 +361,14 @@ class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); - Primitive::Type ret_type = instruction_->GetType(); + DataType::Type ret_type = instruction_->GetType(); Location ret_loc = calling_convention.GetReturnLocation(ret_type); mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type); } else { @@ -502,21 +426,21 @@ class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -814,7 +738,7 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); LocationSummary* locations = instruction_->GetLocations(); - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; GpuRegister reg_out = out_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); @@ -900,19 +824,19 @@ class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove(obj_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -978,7 +902,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { LocationSummary* locations = instruction_->GetLocations(); - Primitive::Type type = Primitive::kPrimNot; + DataType::Type type = DataType::Type::kReference; GpuRegister reg_out = out_.AsRegister<GpuRegister>(); DCHECK(locations->CanCall()); DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out)); @@ -993,7 +917,7 @@ class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 { CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen); mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_, - Primitive::kPrimNot); + DataType::Type::kReference); mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, instruction_->GetDexPc(), @@ -1031,22 +955,23 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena(), &isa_features), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator(), &isa_features), isa_features_(isa_features), uint32_literals_(std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), uint64_literals_(std::less<uint64_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_string_patches_(StringReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), jit_class_patches_(TypeReferenceValueComparator(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Save RA (containing the return address) to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(RA)); } @@ -1061,12 +986,13 @@ void CodeGeneratorMIPS64::Finalize(CodeAllocator* allocator) { __ FinalizeCode(); // Adjust native pc offsets in stack maps. - for (size_t i = 0, num = stack_map_stream_.GetNumberOfStackMaps(); i != num; ++i) { + StackMapStream* stack_map_stream = GetStackMapStream(); + for (size_t i = 0, num = stack_map_stream->GetNumberOfStackMaps(); i != num; ++i) { uint32_t old_position = - stack_map_stream_.GetStackMap(i).native_pc_code_offset.Uint32Value(kMips64); + stack_map_stream->GetStackMap(i).native_pc_code_offset.Uint32Value(InstructionSet::kMips64); uint32_t new_position = __ GetAdjustedPosition(old_position); DCHECK_GE(new_position, old_position); - stack_map_stream_.SetStackMapNativePcOffset(i, new_position); + stack_map_stream->SetStackMapNativePcOffset(i, new_position); } // Adjust pc offsets for the disassembly information. @@ -1137,6 +1063,13 @@ void ParallelMoveResolverMIPS64::Exchange(int index1, int index2, bool double_sl __ StoreToOffset(store_type, TMP, SP, index1 + stack_offset); } +void ParallelMoveResolverMIPS64::ExchangeQuadSlots(int index1, int index2) { + __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, index1); + __ LoadFpuFromOffset(kLoadQuadword, FTMP2, SP, index2); + __ StoreFpuToOffset(kStoreQuadword, FTMP, SP, index2); + __ StoreFpuToOffset(kStoreQuadword, FTMP2, SP, index1); +} + static dwarf::Reg DWARFReg(GpuRegister reg) { return dwarf::Reg::Mips64Core(static_cast<int>(reg)); } @@ -1148,13 +1081,21 @@ static dwarf::Reg DWARFReg(FpuRegister reg) { void CodeGeneratorMIPS64::GenerateFrameEntry() { __ Bind(&frame_entry_label_); - bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kMips64) || !IsLeafMethod(); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Lhu(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()); + } + + bool do_overflow_check = + FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kMips64) || !IsLeafMethod(); if (do_overflow_check) { - __ LoadFromOffset(kLoadWord, - ZERO, - SP, - -static_cast<int32_t>(GetStackOverflowReservedBytes(kMips64))); + __ LoadFromOffset( + kLoadWord, + ZERO, + SP, + -static_cast<int32_t>(GetStackOverflowReservedBytes(InstructionSet::kMips64))); RecordPcInfo(nullptr, 0); } @@ -1163,8 +1104,9 @@ void CodeGeneratorMIPS64::GenerateFrameEntry() { } // Make sure the frame size isn't unreasonably large. - if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) { - LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes"; + if (GetFrameSize() > GetStackOverflowReservedBytes(InstructionSet::kMips64)) { + LOG(FATAL) << "Stack frame larger than " + << GetStackOverflowReservedBytes(InstructionSet::kMips64) << " bytes"; } // Spill callee-saved registers. @@ -1244,7 +1186,7 @@ void CodeGeneratorMIPS64::Bind(HBasicBlock* block) { void CodeGeneratorMIPS64::MoveLocation(Location destination, Location source, - Primitive::Type dst_type) { + DataType::Type dst_type) { if (source.Equals(destination)) { return; } @@ -1252,7 +1194,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, // A valid move can always be inferred from the destination and source // locations. When moving from and to a register, the argument type can be // used to generate 32bit instead of 64bit moves. - bool unspecified_type = (dst_type == Primitive::kPrimVoid); + bool unspecified_type = (dst_type == DataType::Type::kVoid); DCHECK_EQ(unspecified_type, false); if (destination.IsRegister() || destination.IsFpuRegister()) { @@ -1263,27 +1205,27 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, || src_cst->IsFloatConstant() || src_cst->IsNullConstant()))) { // For stack slots and 32bit constants, a 64bit type is appropriate. - dst_type = destination.IsRegister() ? Primitive::kPrimInt : Primitive::kPrimFloat; + dst_type = destination.IsRegister() ? DataType::Type::kInt32 : DataType::Type::kFloat32; } else { // If the source is a double stack slot or a 64bit constant, a 64bit // type is appropriate. Else the source is a register, and since the // type has not been specified, we chose a 64bit type to force a 64bit // move. - dst_type = destination.IsRegister() ? Primitive::kPrimLong : Primitive::kPrimDouble; + dst_type = destination.IsRegister() ? DataType::Type::kInt64 : DataType::Type::kFloat64; } } - DCHECK((destination.IsFpuRegister() && Primitive::IsFloatingPointType(dst_type)) || - (destination.IsRegister() && !Primitive::IsFloatingPointType(dst_type))); + DCHECK((destination.IsFpuRegister() && DataType::IsFloatingPointType(dst_type)) || + (destination.IsRegister() && !DataType::IsFloatingPointType(dst_type))); if (source.IsStackSlot() || source.IsDoubleStackSlot()) { // Move to GPR/FPR from stack LoadOperandType load_type = source.IsStackSlot() ? kLoadWord : kLoadDoubleword; - if (Primitive::IsFloatingPointType(dst_type)) { + if (DataType::IsFloatingPointType(dst_type)) { __ LoadFpuFromOffset(load_type, destination.AsFpuRegister<FpuRegister>(), SP, source.GetStackIndex()); } else { - // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot. + // TODO: use load_type = kLoadUnsignedWord when type == DataType::Type::kReference. __ LoadFromOffset(load_type, destination.AsRegister<GpuRegister>(), SP, @@ -1297,27 +1239,27 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } else if (source.IsConstant()) { // Move to GPR/FPR from constant GpuRegister gpr = AT; - if (!Primitive::IsFloatingPointType(dst_type)) { + if (!DataType::IsFloatingPointType(dst_type)) { gpr = destination.AsRegister<GpuRegister>(); } - if (dst_type == Primitive::kPrimInt || dst_type == Primitive::kPrimFloat) { + if (dst_type == DataType::Type::kInt32 || dst_type == DataType::Type::kFloat32) { int32_t value = GetInt32ValueOf(source.GetConstant()->AsConstant()); - if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + if (DataType::IsFloatingPointType(dst_type) && value == 0) { gpr = ZERO; } else { __ LoadConst32(gpr, value); } } else { int64_t value = GetInt64ValueOf(source.GetConstant()->AsConstant()); - if (Primitive::IsFloatingPointType(dst_type) && value == 0) { + if (DataType::IsFloatingPointType(dst_type) && value == 0) { gpr = ZERO; } else { __ LoadConst64(gpr, value); } } - if (dst_type == Primitive::kPrimFloat) { + if (dst_type == DataType::Type::kFloat32) { __ Mtc1(gpr, destination.AsFpuRegister<FpuRegister>()); - } else if (dst_type == Primitive::kPrimDouble) { + } else if (dst_type == DataType::Type::kFloat64) { __ Dmtc1(gpr, destination.AsFpuRegister<FpuRegister>()); } } else if (source.IsRegister()) { @@ -1326,7 +1268,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, __ Move(destination.AsRegister<GpuRegister>(), source.AsRegister<GpuRegister>()); } else { DCHECK(destination.IsFpuRegister()); - if (Primitive::Is64BitType(dst_type)) { + if (DataType::Is64BitType(dst_type)) { __ Dmtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); } else { __ Mtc1(source.AsRegister<GpuRegister>(), destination.AsFpuRegister<FpuRegister>()); @@ -1339,16 +1281,16 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, VectorRegisterFrom(source)); } else { // Move to FPR from FPR - if (dst_type == Primitive::kPrimFloat) { + if (dst_type == DataType::Type::kFloat32) { __ MovS(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); } else { - DCHECK_EQ(dst_type, Primitive::kPrimDouble); + DCHECK_EQ(dst_type, DataType::Type::kFloat64); __ MovD(destination.AsFpuRegister<FpuRegister>(), source.AsFpuRegister<FpuRegister>()); } } } else { DCHECK(destination.IsRegister()); - if (Primitive::Is64BitType(dst_type)) { + if (DataType::Is64BitType(dst_type)) { __ Dmfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); } else { __ Mfc1(destination.AsRegister<GpuRegister>(), source.AsFpuRegister<FpuRegister>()); @@ -1377,13 +1319,14 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, if (source.IsRegister() || source.IsFpuRegister()) { if (unspecified_type) { if (source.IsRegister()) { - dst_type = destination.IsStackSlot() ? Primitive::kPrimInt : Primitive::kPrimLong; + dst_type = destination.IsStackSlot() ? DataType::Type::kInt32 : DataType::Type::kInt64; } else { - dst_type = destination.IsStackSlot() ? Primitive::kPrimFloat : Primitive::kPrimDouble; + dst_type = + destination.IsStackSlot() ? DataType::Type::kFloat32 : DataType::Type::kFloat64; } } - DCHECK((destination.IsDoubleStackSlot() == Primitive::Is64BitType(dst_type)) && - (source.IsFpuRegister() == Primitive::IsFloatingPointType(dst_type))); + DCHECK((destination.IsDoubleStackSlot() == DataType::Is64BitType(dst_type)) && + (source.IsFpuRegister() == DataType::IsFloatingPointType(dst_type))); // Move to stack from GPR/FPR StoreOperandType store_type = destination.IsStackSlot() ? kStoreWord : kStoreDoubleword; if (source.IsRegister()) { @@ -1432,7 +1375,7 @@ void CodeGeneratorMIPS64::MoveLocation(Location destination, } } -void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive::Type type) { +void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, DataType::Type type) { DCHECK(!loc1.IsConstant()); DCHECK(!loc2.IsConstant()); @@ -1442,6 +1385,8 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive: bool is_slot1 = loc1.IsStackSlot() || loc1.IsDoubleStackSlot(); bool is_slot2 = loc2.IsStackSlot() || loc2.IsDoubleStackSlot(); + bool is_simd1 = loc1.IsSIMDStackSlot(); + bool is_simd2 = loc2.IsSIMDStackSlot(); bool is_fp_reg1 = loc1.IsFpuRegister(); bool is_fp_reg2 = loc2.IsFpuRegister(); @@ -1454,17 +1399,23 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive: __ Move(r1, TMP); } else if (is_fp_reg2 && is_fp_reg1) { // Swap 2 FPRs - FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); - FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) { - __ MovS(FTMP, r1); - __ MovS(r1, r2); - __ MovS(r2, FTMP); + if (GetGraph()->HasSIMD()) { + __ MoveV(static_cast<VectorRegister>(FTMP), VectorRegisterFrom(loc1)); + __ MoveV(VectorRegisterFrom(loc1), VectorRegisterFrom(loc2)); + __ MoveV(VectorRegisterFrom(loc2), static_cast<VectorRegister>(FTMP)); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); - __ MovD(FTMP, r1); - __ MovD(r1, r2); - __ MovD(r2, FTMP); + FpuRegister r1 = loc1.AsFpuRegister<FpuRegister>(); + FpuRegister r2 = loc2.AsFpuRegister<FpuRegister>(); + if (type == DataType::Type::kFloat32) { + __ MovS(FTMP, r1); + __ MovS(r1, r2); + __ MovS(r2, FTMP); + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + __ MovD(FTMP, r1); + __ MovD(r1, r2); + __ MovD(r2, FTMP); + } } } else if (is_slot1 != is_slot2) { // Swap GPR/FPR and stack slot @@ -1472,7 +1423,7 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive: Location mem_loc = is_slot1 ? loc1 : loc2; LoadOperandType load_type = mem_loc.IsStackSlot() ? kLoadWord : kLoadDoubleword; StoreOperandType store_type = mem_loc.IsStackSlot() ? kStoreWord : kStoreDoubleword; - // TODO: use load_type = kLoadUnsignedWord when type == Primitive::kPrimNot. + // TODO: use load_type = kLoadUnsignedWord when type == DataType::Type::kReference. __ LoadFromOffset(load_type, TMP, SP, mem_loc.GetStackIndex()); if (reg_loc.IsFpuRegister()) { __ StoreFpuToOffset(store_type, @@ -1493,6 +1444,17 @@ void CodeGeneratorMIPS64::SwapLocations(Location loc1, Location loc2, Primitive: move_resolver_.Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), loc1.IsDoubleStackSlot()); + } else if (is_simd1 && is_simd2) { + move_resolver_.ExchangeQuadSlots(loc1.GetStackIndex(), loc2.GetStackIndex()); + } else if ((is_fp_reg1 && is_simd2) || (is_fp_reg2 && is_simd1)) { + Location fp_reg_loc = is_fp_reg1 ? loc1 : loc2; + Location mem_loc = is_fp_reg1 ? loc2 : loc1; + __ LoadFpuFromOffset(kLoadQuadword, FTMP, SP, mem_loc.GetStackIndex()); + __ StoreFpuToOffset(kStoreQuadword, + fp_reg_loc.AsFpuRegister<FpuRegister>(), + SP, + mem_loc.GetStackIndex()); + __ MoveV(VectorRegisterFrom(fp_reg_loc), static_cast<VectorRegister>(FTMP)); } else { LOG(FATAL) << "Unimplemented swap between locations " << loc1 << " and " << loc2; } @@ -1532,91 +1494,99 @@ void CodeGeneratorMIPS64::MarkGCCard(GpuRegister object, } } -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorMIPS64::EmitPcRelativeLinkerPatches( const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PcRelativePatchInfo& info : infos) { - const DexFile& dex_file = info.target_dex_file; + const DexFile* dex_file = info.target_dex_file; size_t offset_or_index = info.offset_or_index; DCHECK(info.label.IsBound()); uint32_t literal_offset = __ GetLabelLocation(&info.label); const PcRelativePatchInfo& info_high = info.patch_info_high ? *info.patch_info_high : info; uint32_t pc_rel_offset = __ GetLabelLocation(&info_high.label); - linker_patches->push_back(Factory(literal_offset, &dex_file, pc_rel_offset, offset_or_index)); + linker_patches->push_back(Factory(literal_offset, dex_file, pc_rel_offset, offset_or_index)); } } -void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = - pc_relative_method_patches_.size() + + boot_image_method_patches_.size() + method_bss_entry_patches_.size() + - pc_relative_type_patches_.size() + + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - pc_relative_string_patches_.size(); + boot_image_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { - DCHECK(pc_relative_method_patches_.empty()); - DCHECK(pc_relative_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_, - linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + DCHECK(boot_image_method_patches_.empty()); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageMethodPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - info_high, - &pc_relative_method_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &boot_image_method_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch( MethodReference target_method, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(*target_method.dex_file, - target_method.dex_method_index, - info_high, - &method_bss_entry_patches_); + return NewPcRelativePatch( + target_method.dex_file, target_method.index, info_high, &method_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageTypePatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &pc_relative_type_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &boot_image_type_patches_); } CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewTypeBssEntryPatch( const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, type_index.index_, info_high, &type_bss_entry_patches_); + return NewPcRelativePatch(&dex_file, type_index.index_, info_high, &type_bss_entry_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewBootImageStringPatch( const DexFile& dex_file, dex::StringIndex string_index, const PcRelativePatchInfo* info_high) { - return NewPcRelativePatch(dex_file, string_index.index_, info_high, &pc_relative_string_patches_); + return NewPcRelativePatch( + &dex_file, string_index.index_, info_high, &boot_image_string_patches_); } -CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewStringBssEntryPatch( const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high) { + return NewPcRelativePatch(&dex_file, string_index.index_, info_high, &string_bss_entry_patches_); +} + +CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch( + const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches) { @@ -1649,15 +1619,16 @@ void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchIn __ Auipc(out, /* placeholder */ 0x1234); // A following instruction will add the sign-extended low half of the 32-bit // offset to `out` (e.g. ld, jialc, daddiu). - DCHECK_EQ(info_low->patch_info_high, info_high); - __ Bind(&info_low->label); + if (info_low != nullptr) { + DCHECK_EQ(info_low->patch_info_high, info_high); + __ Bind(&info_low->label); + } } Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_file, dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite(StringReference(&dex_file, string_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); return jit_string_patches_.GetOrCreate( StringReference(&dex_file, string_index), [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); @@ -1666,8 +1637,7 @@ Literal* CodeGeneratorMIPS64::DeduplicateJitStringLiteral(const DexFile& dex_fil Literal* CodeGeneratorMIPS64::DeduplicateJitClassLiteral(const DexFile& dex_file, dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite(TypeReference(&dex_file, type_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); return jit_class_patches_.GetOrCreate( TypeReference(&dex_file, type_index), [this]() { return __ NewLiteral<uint32_t>(/* placeholder */ 0u); }); @@ -1687,17 +1657,13 @@ void CodeGeneratorMIPS64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots for (const auto& entry : jit_string_patches_) { const StringReference& string_reference = entry.first; Literal* table_entry_literal = entry.second; - const auto it = jit_string_roots_.find(string_reference); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } for (const auto& entry : jit_class_patches_) { const TypeReference& type_reference = entry.first; Literal* table_entry_literal = entry.second; - const auto it = jit_class_roots_.find(type_reference); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, table_entry_literal, index_in_table); } } @@ -1718,6 +1684,11 @@ void CodeGeneratorMIPS64::SetupBlockedRegisters() const { blocked_core_registers_[TMP2] = true; blocked_fpu_registers_[FTMP] = true; + if (GetInstructionSetFeatures().HasMsa()) { + // To be used just for MSA instructions. + blocked_fpu_registers_[FTMP2] = true; + } + // Reserve suspend and thread registers. blocked_core_registers_[S0] = true; blocked_core_registers_[TR] = true; @@ -1795,9 +1766,15 @@ void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) { void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path, GpuRegister class_reg) { - __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value()); - __ LoadConst32(AT, mirror::Class::kStatusInitialized); - __ Bltc(TMP, AT, slow_path->GetEntryLabel()); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ LoadFromOffset(kLoadUnsignedByte, TMP, class_reg, status_byte_offset); + __ Sltiu(TMP, TMP, shifted_initialized_value); + __ Bnezc(TMP, slow_path->GetEntryLabel()); // Even if the initialized flag is set, we need to ensure consistent memory ordering. __ Sync(0); __ Bind(slow_path->GetExitLabel()); @@ -1810,8 +1787,19 @@ void InstructionCodeGeneratorMIPS64::GenerateMemoryBarrier(MemBarrierKind kind A void InstructionCodeGeneratorMIPS64::GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor) { SuspendCheckSlowPathMIPS64* slow_path = - new (GetGraph()->GetArena()) SuspendCheckSlowPathMIPS64(instruction, successor); - codegen_->AddSlowPath(slow_path); + down_cast<SuspendCheckSlowPathMIPS64*>(instruction->GetSlowPath()); + + if (slow_path == nullptr) { + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathMIPS64(instruction, successor); + instruction->SetSlowPath(slow_path); + codegen_->AddSlowPath(slow_path); + if (successor != nullptr) { + DCHECK(successor->IsLoopHeader()); + } + } else { + DCHECK_EQ(slow_path->GetSuccessor(), successor); + } __ LoadFromOffset(kLoadUnsignedHalfword, TMP, @@ -1835,11 +1823,11 @@ InstructionCodeGeneratorMIPS64::InstructionCodeGeneratorMIPS64(HGraph* graph, void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { DCHECK_EQ(instruction->InputCount(), 2U); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); - Primitive::Type type = instruction->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + DataType::Type type = instruction->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); HInstruction* right = instruction->InputAt(1); bool can_use_imm = false; @@ -1847,11 +1835,19 @@ void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { int64_t imm = CodeGenerator::GetInt64ValueOf(right->AsConstant()); if (instruction->IsAnd() || instruction->IsOr() || instruction->IsXor()) { can_use_imm = IsUint<16>(imm); - } else if (instruction->IsAdd()) { - can_use_imm = IsInt<16>(imm); } else { - DCHECK(instruction->IsSub()); - can_use_imm = IsInt<16>(-imm); + DCHECK(instruction->IsAdd() || instruction->IsSub()); + bool single_use = right->GetUses().HasExactlyOneElement(); + if (instruction->IsSub()) { + if (!(type == DataType::Type::kInt32 && imm == INT32_MIN)) { + imm = -imm; + } + } + if (type == DataType::Type::kInt32) { + can_use_imm = IsInt<16>(imm) || (Low16Bits(imm) == 0) || single_use; + } else { + can_use_imm = IsInt<16>(imm) || (IsInt<32>(imm) && (Low16Bits(imm) == 0)) || single_use; + } } } if (can_use_imm) @@ -1862,8 +1858,8 @@ void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { } break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -1875,12 +1871,12 @@ void LocationsBuilderMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { } void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); @@ -1909,46 +1905,106 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio __ Xori(dst, lhs, rhs_imm); else __ Xor(dst, lhs, rhs_reg); - } else if (instruction->IsAdd()) { - if (type == Primitive::kPrimInt) { - if (use_imm) - __ Addiu(dst, lhs, rhs_imm); - else - __ Addu(dst, lhs, rhs_reg); - } else { - if (use_imm) - __ Daddiu(dst, lhs, rhs_imm); - else - __ Daddu(dst, lhs, rhs_reg); + } else if (instruction->IsAdd() || instruction->IsSub()) { + if (instruction->IsSub()) { + rhs_imm = -rhs_imm; } - } else { - DCHECK(instruction->IsSub()); - if (type == Primitive::kPrimInt) { - if (use_imm) - __ Addiu(dst, lhs, -rhs_imm); - else - __ Subu(dst, lhs, rhs_reg); + if (type == DataType::Type::kInt32) { + if (use_imm) { + if (IsInt<16>(rhs_imm)) { + __ Addiu(dst, lhs, rhs_imm); + } else { + int16_t rhs_imm_high = High16Bits(rhs_imm); + int16_t rhs_imm_low = Low16Bits(rhs_imm); + if (rhs_imm_low < 0) { + rhs_imm_high += 1; + } + __ Aui(dst, lhs, rhs_imm_high); + if (rhs_imm_low != 0) { + __ Addiu(dst, dst, rhs_imm_low); + } + } + } else { + if (instruction->IsAdd()) { + __ Addu(dst, lhs, rhs_reg); + } else { + DCHECK(instruction->IsSub()); + __ Subu(dst, lhs, rhs_reg); + } + } } else { - if (use_imm) - __ Daddiu(dst, lhs, -rhs_imm); - else + if (use_imm) { + if (IsInt<16>(rhs_imm)) { + __ Daddiu(dst, lhs, rhs_imm); + } else if (IsInt<32>(rhs_imm)) { + int16_t rhs_imm_high = High16Bits(rhs_imm); + int16_t rhs_imm_low = Low16Bits(rhs_imm); + bool overflow_hi16 = false; + if (rhs_imm_low < 0) { + rhs_imm_high += 1; + overflow_hi16 = (rhs_imm_high == -32768); + } + __ Daui(dst, lhs, rhs_imm_high); + if (rhs_imm_low != 0) { + __ Daddiu(dst, dst, rhs_imm_low); + } + if (overflow_hi16) { + __ Dahi(dst, 1); + } + } else { + int16_t rhs_imm_low = Low16Bits(Low32Bits(rhs_imm)); + if (rhs_imm_low < 0) { + rhs_imm += (INT64_C(1) << 16); + } + int16_t rhs_imm_upper = High16Bits(Low32Bits(rhs_imm)); + if (rhs_imm_upper < 0) { + rhs_imm += (INT64_C(1) << 32); + } + int16_t rhs_imm_high = Low16Bits(High32Bits(rhs_imm)); + if (rhs_imm_high < 0) { + rhs_imm += (INT64_C(1) << 48); + } + int16_t rhs_imm_top = High16Bits(High32Bits(rhs_imm)); + GpuRegister tmp = lhs; + if (rhs_imm_low != 0) { + __ Daddiu(dst, tmp, rhs_imm_low); + tmp = dst; + } + // Dahi and Dati must use the same input and output register, so we have to initialize + // the dst register using Daddiu or Daui, even when the intermediate value is zero: + // Daui(dst, lhs, 0). + if ((rhs_imm_upper != 0) || (rhs_imm_low == 0)) { + __ Daui(dst, tmp, rhs_imm_upper); + } + if (rhs_imm_high != 0) { + __ Dahi(dst, rhs_imm_high); + } + if (rhs_imm_top != 0) { + __ Dati(dst, rhs_imm_top); + } + } + } else if (instruction->IsAdd()) { + __ Daddu(dst, lhs, rhs_reg); + } else { + DCHECK(instruction->IsSub()); __ Dsubu(dst, lhs, rhs_reg); + } } } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); if (instruction->IsAdd()) { - if (type == Primitive::kPrimFloat) + if (type == DataType::Type::kFloat32) __ AddS(dst, lhs, rhs); else __ AddD(dst, lhs, rhs); } else if (instruction->IsSub()) { - if (type == Primitive::kPrimFloat) + if (type == DataType::Type::kFloat32) __ SubS(dst, lhs, rhs); else __ SubD(dst, lhs, rhs); @@ -1965,11 +2021,11 @@ void InstructionCodeGeneratorMIPS64::HandleBinaryOp(HBinaryOperation* instructio void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - Primitive::Type type = instr->GetResultType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instr); + DataType::Type type = instr->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -1983,11 +2039,11 @@ void LocationsBuilderMIPS64::HandleShift(HBinaryOperation* instr) { void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { DCHECK(instr->IsShl() || instr->IsShr() || instr->IsUShr() || instr->IsRor()); LocationSummary* locations = instr->GetLocations(); - Primitive::Type type = instr->GetType(); + DataType::Type type = instr->GetType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); @@ -2003,13 +2059,13 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { if (use_imm) { uint32_t shift_value = rhs_imm & - (type == Primitive::kPrimInt ? kMaxIntShiftDistance : kMaxLongShiftDistance); + (type == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance); if (shift_value == 0) { if (dst != lhs) { __ Move(dst, lhs); } - } else if (type == Primitive::kPrimInt) { + } else if (type == DataType::Type::kInt32) { if (instr->IsShl()) { __ Sll(dst, lhs, shift_value); } else if (instr->IsShr()) { @@ -2044,7 +2100,7 @@ void InstructionCodeGeneratorMIPS64::HandleShift(HBinaryOperation* instr) { } } } else { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { if (instr->IsShl()) { __ Sllv(dst, lhs, rhs_reg); } else if (instr->IsShr()) { @@ -2090,20 +2146,20 @@ void InstructionCodeGeneratorMIPS64::VisitAnd(HAnd* instruction) { } void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (type == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (type == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(type)) { + if (DataType::IsFloatingPointType(type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps in the case of an object array get with @@ -2117,7 +2173,12 @@ void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier. if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { - locations->AddTemp(Location::RequiresRegister()); + bool temp_needed = instruction->GetIndex()->IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + if (temp_needed) { + locations->AddTemp(Location::RequiresRegister()); + } } } @@ -2137,11 +2198,12 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); auto null_checker = GetImplicitNullChecker(instruction, codegen_); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && instruction->IsStringCharAt(); switch (type) { - case Primitive::kPrimBoolean: { + case DataType::Type::kBool: + case DataType::Type::kUint8: { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2154,7 +2216,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2167,20 +2229,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimShort: { - GpuRegister out = out_loc.AsRegister<GpuRegister>(); - if (index.IsConstant()) { - size_t offset = - (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; - __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); - } else { - __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2); - __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); - } - break; - } - - case Primitive::kPrimChar: { + case DataType::Type::kUint16: { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (maybe_compressed_char_at) { uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -2232,10 +2281,24 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt16: { + GpuRegister out = out_loc.AsRegister<GpuRegister>(); + if (index.IsConstant()) { + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset; + __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker); + } else { + __ Dlsa(TMP, index.AsRegister<GpuRegister>(), obj, TIMES_2); + __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker); + } + break; + } + + case DataType::Type::kInt32: { DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t)); GpuRegister out = out_loc.AsRegister<GpuRegister>(); - LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord; + LoadOperandType load_type = + (type == DataType::Type::kReference) ? kLoadUnsignedWord : kLoadWord; if (index.IsConstant()) { size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; @@ -2247,23 +2310,39 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); // /* HeapReference<Object> */ out = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp = locations->GetTemp(0); + bool temp_needed = index.IsConstant() + ? !kBakerReadBarrierThunksEnableForFields + : !kBakerReadBarrierThunksEnableForArrays; + Location temp = temp_needed ? locations->GetTemp(0) : Location::NoLocation(); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call. - codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, - out_loc, - obj, - data_offset, - index, - temp, - /* needs_null_check */ true); + DCHECK(!instruction->CanDoImplicitNullCheckOn(instruction->InputAt(0))); + if (index.IsConstant()) { + // Array load with a constant index can be treated as a field load. + size_t offset = + (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset; + codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + offset, + temp, + /* needs_null_check */ false); + } else { + codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction, + out_loc, + obj, + data_offset, + index, + temp, + /* needs_null_check */ false); + } } else { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { @@ -2291,7 +2370,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { GpuRegister out = out_loc.AsRegister<GpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2304,7 +2383,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2317,7 +2396,7 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { FpuRegister out = out_loc.AsFpuRegister<FpuRegister>(); if (index.IsConstant()) { size_t offset = @@ -2330,14 +2409,16 @@ void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } } void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -2375,13 +2456,13 @@ Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* ins } void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : @@ -2389,7 +2470,7 @@ void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(2)->GetType())) { locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2))); } else { locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2))); @@ -2405,7 +2486,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location index = locations->InAt(1); Location value_location = locations->InAt(2); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -2413,8 +2494,9 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { GpuRegister base_reg = index.IsConstant() ? obj : TMP; switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1; @@ -2431,8 +2513,8 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2; @@ -2449,7 +2531,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; @@ -2466,7 +2548,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (value_location.IsConstant()) { // Just setting null. uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); @@ -2494,7 +2576,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { SlowPathCodeMIPS64* slow_path = nullptr; if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathMIPS64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { Mips64Label non_zero; @@ -2581,7 +2663,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; @@ -2598,7 +2680,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4; @@ -2615,7 +2697,7 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); if (index.IsConstant()) { data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8; @@ -2632,7 +2714,9 @@ void InstructionCodeGeneratorMIPS64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } @@ -2644,28 +2728,98 @@ void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1))); LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); + + HInstruction* index = instruction->InputAt(0); + HInstruction* length = instruction->InputAt(1); + + bool const_index = false; + bool const_length = false; + + if (index->IsConstant()) { + if (length->IsConstant()) { + const_index = true; + const_length = true; + } else { + int32_t index_value = index->AsIntConstant()->GetValue(); + if (index_value < 0 || IsInt<16>(index_value + 1)) { + const_index = true; + } + } + } else if (length->IsConstant()) { + int32_t length_value = length->AsIntConstant()->GetValue(); + if (IsUint<15>(length_value)) { + const_length = true; + } + } + + locations->SetInAt(0, const_index + ? Location::ConstantLocation(index->AsConstant()) + : Location::RequiresRegister()); + locations->SetInAt(1, const_length + ? Location::ConstantLocation(length->AsConstant()) + : Location::RequiresRegister()); } void InstructionCodeGeneratorMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) { LocationSummary* locations = instruction->GetLocations(); - BoundsCheckSlowPathMIPS64* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathMIPS64(instruction); - codegen_->AddSlowPath(slow_path); - - GpuRegister index = locations->InAt(0).AsRegister<GpuRegister>(); - GpuRegister length = locations->InAt(1).AsRegister<GpuRegister>(); + Location index_loc = locations->InAt(0); + Location length_loc = locations->InAt(1); + + if (length_loc.IsConstant()) { + int32_t length = length_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0 || index >= length) { + BoundsCheckSlowPathMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); + codegen_->AddSlowPath(slow_path); + __ Bc(slow_path->GetEntryLabel()); + } else { + // Nothing to be done. + } + return; + } - // length is limited by the maximum positive signed 32-bit integer. - // Unsigned comparison of length and index checks for index < 0 - // and for length <= index simultaneously. - __ Bgeuc(index, length, slow_path->GetEntryLabel()); + BoundsCheckSlowPathMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); + codegen_->AddSlowPath(slow_path); + GpuRegister index = index_loc.AsRegister<GpuRegister>(); + if (length == 0) { + __ Bc(slow_path->GetEntryLabel()); + } else if (length == 1) { + __ Bnezc(index, slow_path->GetEntryLabel()); + } else { + DCHECK(IsUint<15>(length)) << length; + __ Sltiu(TMP, index, length); + __ Beqzc(TMP, slow_path->GetEntryLabel()); + } + } else { + GpuRegister length = length_loc.AsRegister<GpuRegister>(); + BoundsCheckSlowPathMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathMIPS64(instruction); + codegen_->AddSlowPath(slow_path); + if (index_loc.IsConstant()) { + int32_t index = index_loc.GetConstant()->AsIntConstant()->GetValue(); + if (index < 0) { + __ Bc(slow_path->GetEntryLabel()); + } else if (index == 0) { + __ Blezc(length, slow_path->GetEntryLabel()); + } else { + DCHECK(IsInt<16>(index + 1)) << index; + __ Sltiu(TMP, length, index + 1); + __ Bnezc(TMP, slow_path->GetEntryLabel()); + } + } else { + GpuRegister index = index_loc.AsRegister<GpuRegister>(); + __ Bgeuc(index, length, slow_path->GetEntryLabel()); + } + } } // Temp is used for read barrier. static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { if (kEmitCompilerReadBarrier && + !(kUseBakerReadBarrier && kBakerReadBarrierThunksEnableForFields) && (kUseBakerReadBarrier || type_check_kind == TypeCheckKind::kAbstractClassCheck || type_check_kind == TypeCheckKind::kClassHierarchyCheck || @@ -2681,27 +2835,10 @@ static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { } void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) { - LocationSummary::CallKind call_kind = LocationSummary::kNoCall; - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); - TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = (throws_into_catch || kEmitCompilerReadBarrier) - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall; // In fact, call on a fatal (non-returning) slow path. - break; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - case TypeCheckKind::kInterfaceCheck: - call_kind = LocationSummary::kCallOnSlowPath; - break; - } - - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind)); @@ -2728,21 +2865,10 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); Mips64Label done; - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = false; - if (!kEmitCompilerReadBarrier) { - is_type_check_slow_path_fatal = - (type_check_kind == TypeCheckKind::kExactCheck || - type_check_kind == TypeCheckKind::kAbstractClassCheck || - type_check_kind == TypeCheckKind::kClassHierarchyCheck || - type_check_kind == TypeCheckKind::kArrayObjectCheck) && - !instruction->CanThrowIntoCatchBlock(); - } + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(slow_path); // Avoid this check if we know `obj` is not null. @@ -2896,7 +3022,7 @@ void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) { void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -2905,7 +3031,7 @@ void LocationsBuilderMIPS64::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { // We assume the class is not null. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( + SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( check->GetLoadClass(), check, check->GetDexPc(), @@ -2916,24 +3042,25 @@ void InstructionCodeGeneratorMIPS64::VisitClinitCheck(HClinitCheck* check) { } void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { - Primitive::Type in_type = compare->InputAt(0)->GetType(); + DataType::Type in_type = compare->InputAt(0)->GetType(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(compare); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(compare); switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(compare->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -2947,24 +3074,25 @@ void LocationsBuilderMIPS64::VisitCompare(HCompare* compare) { void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { LocationSummary* locations = instruction->GetLocations(); GpuRegister res = locations->Out().AsRegister<GpuRegister>(); - Primitive::Type in_type = instruction->InputAt(0)->GetType(); + DataType::Type in_type = instruction->InputAt(0)->GetType(); // 0 if: left == right // 1 if: left > right // -1 if: left < right switch (in_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); Location rhs_location = locations->InAt(1); bool use_imm = rhs_location.IsConstant(); GpuRegister rhs = ZERO; if (use_imm) { - if (in_type == Primitive::kPrimLong) { + if (in_type == DataType::Type::kInt64) { int64_t value = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()->AsConstant()); if (value != 0) { rhs = AT; @@ -2986,7 +3114,7 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); Mips64Label done; @@ -3008,7 +3136,7 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); Mips64Label done; @@ -3036,16 +3164,16 @@ void InstructionCodeGeneratorMIPS64::VisitCompare(HCompare* instruction) { } void LocationsBuilderMIPS64::HandleCondition(HCondition* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); switch (instruction->InputAt(0)->GetType()) { default: - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); break; @@ -3060,18 +3188,18 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { return; } - Primitive::Type type = instruction->InputAt(0)->GetType(); + DataType::Type type = instruction->InputAt(0)->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { default: // Integer case. GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ false, locations); return; - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateIntLongCompare(instruction->GetCondition(), /* is64bit */ true, locations); return; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations); return; } @@ -3079,7 +3207,7 @@ void InstructionCodeGeneratorMIPS64::HandleCondition(HCondition* instruction) { void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - Primitive::Type type = instruction->GetResultType(); + DataType::Type type = instruction->GetResultType(); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3094,10 +3222,10 @@ void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instr __ Move(out, ZERO); } else { if (imm == -1) { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ Subu(out, ZERO, dividend); } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); __ Dsubu(out, ZERO, dividend); } } else if (out != dividend) { @@ -3108,7 +3236,7 @@ void InstructionCodeGeneratorMIPS64::DivRemOneOrMinusOne(HBinaryOperation* instr void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - Primitive::Type type = instruction->GetResultType(); + DataType::Type type = instruction->GetResultType(); LocationSummary* locations = instruction->GetLocations(); Location second = locations->InAt(1); @@ -3121,7 +3249,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru int ctz_imm = CTZ(abs_imm); if (instruction->IsDiv()) { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { if (ctz_imm == 1) { // Fast path for division by +/-2, which is very common. __ Srl(TMP, dividend, 31); @@ -3135,7 +3263,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru __ Subu(out, ZERO, out); } } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); if (ctz_imm == 1) { // Fast path for division by +/-2, which is very common. __ Dsrl32(TMP, dividend, 31); @@ -3158,7 +3286,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru } } } else { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { if (ctz_imm == 1) { // Fast path for modulo +/-2, which is very common. __ Sra(TMP, dividend, 31); @@ -3169,16 +3297,11 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru __ Sra(TMP, dividend, 31); __ Srl(TMP, TMP, 32 - ctz_imm); __ Addu(out, dividend, TMP); - if (IsUint<16>(abs_imm - 1)) { - __ Andi(out, out, abs_imm - 1); - } else { - __ Sll(out, out, 32 - ctz_imm); - __ Srl(out, out, 32 - ctz_imm); - } + __ Ins(out, ZERO, ctz_imm, 32 - ctz_imm); __ Subu(out, out, TMP); } } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); if (ctz_imm == 1) { // Fast path for modulo +/-2, which is very common. __ Dsra32(TMP, dividend, 31); @@ -3193,17 +3316,7 @@ void InstructionCodeGeneratorMIPS64::DivRemByPowerOfTwo(HBinaryOperation* instru __ Dsrl32(TMP, TMP, 32 - ctz_imm); } __ Daddu(out, dividend, TMP); - if (IsUint<16>(abs_imm - 1)) { - __ Andi(out, out, abs_imm - 1); - } else { - if (ctz_imm > 32) { - __ Dsll(out, out, 64 - ctz_imm); - __ Dsrl(out, out, 64 - ctz_imm); - } else { - __ Dsll32(out, out, 32 - ctz_imm); - __ Dsrl32(out, out, 32 - ctz_imm); - } - } + __ DblIns(out, ZERO, ctz_imm, 64 - ctz_imm); __ Dsubu(out, out, TMP); } } @@ -3221,17 +3334,17 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperat GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); int64_t imm = Int64FromConstant(second.GetConstant()); - Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; int64_t magic; int shift; CalculateMagicAndShiftForDivRem(imm, - (type == Primitive::kPrimLong), + (type == DataType::Type::kInt64), &magic, &shift); - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ LoadConst32(TMP, magic); __ MuhR6(TMP, dividend, TMP); @@ -3286,8 +3399,8 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemWithAnyConstant(HBinaryOperat void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << type; + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; LocationSummary* locations = instruction->GetLocations(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); @@ -3309,12 +3422,12 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in GpuRegister dividend = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister divisor = second.AsRegister<GpuRegister>(); if (instruction->IsDiv()) { - if (type == Primitive::kPrimInt) + if (type == DataType::Type::kInt32) __ DivR6(out, dividend, divisor); else __ Ddiv(out, dividend, divisor); } else { - if (type == Primitive::kPrimInt) + if (type == DataType::Type::kInt32) __ ModR6(out, dividend, divisor); else __ Dmod(out, dividend, divisor); @@ -3324,17 +3437,17 @@ void InstructionCodeGeneratorMIPS64::GenerateDivRemIntegral(HBinaryOperation* in void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); switch (div->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -3346,20 +3459,20 @@ void LocationsBuilderMIPS64::VisitDiv(HDiv* div) { } void InstructionCodeGeneratorMIPS64::VisitDiv(HDiv* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: GenerateDivRemIntegral(instruction); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) + if (type == DataType::Type::kFloat32) __ DivS(dst, lhs, rhs); else __ DivD(dst, lhs, rhs); @@ -3377,13 +3490,13 @@ void LocationsBuilderMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instruction) { SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) DivZeroCheckSlowPathMIPS64(instruction); + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathMIPS64(instruction); codegen_->AddSlowPath(slow_path); Location value = instruction->GetLocations()->InAt(0); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { + if (!DataType::IsIntegralType(type)) { LOG(FATAL) << "Unexpected type " << type << " for DivZeroCheck."; return; } @@ -3403,7 +3516,7 @@ void InstructionCodeGeneratorMIPS64::VisitDivZeroCheck(HDivZeroCheck* instructio void LocationsBuilderMIPS64::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3420,7 +3533,7 @@ void InstructionCodeGeneratorMIPS64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) { void LocationsBuilderMIPS64::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -3429,13 +3542,22 @@ void InstructionCodeGeneratorMIPS64::VisitFloatConstant(HFloatConstant* constant } void InstructionCodeGeneratorMIPS64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } + HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(info->GetSuspendCheck()); + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ Ld(AT, SP, kCurrentMethodStackOffset); + __ Lhu(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + __ Addiu(TMP, TMP, 1); + __ Sh(TMP, AT, ArtMethod::HotnessCountOffset().Int32Value()); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -3622,6 +3744,114 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompare(IfCondition cond, } } +bool InstructionCodeGeneratorMIPS64::MaterializeIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* input_locations, + GpuRegister dst) { + GpuRegister lhs = input_locations->InAt(0).AsRegister<GpuRegister>(); + Location rhs_location = input_locations->InAt(1); + GpuRegister rhs_reg = ZERO; + int64_t rhs_imm = 0; + bool use_imm = rhs_location.IsConstant(); + if (use_imm) { + if (is64bit) { + rhs_imm = CodeGenerator::GetInt64ValueOf(rhs_location.GetConstant()); + } else { + rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant()); + } + } else { + rhs_reg = rhs_location.AsRegister<GpuRegister>(); + } + int64_t rhs_imm_plus_one = rhs_imm + UINT64_C(1); + + switch (cond) { + case kCondEQ: + case kCondNE: + if (use_imm && IsInt<16>(-rhs_imm)) { + if (is64bit) { + __ Daddiu(dst, lhs, -rhs_imm); + } else { + __ Addiu(dst, lhs, -rhs_imm); + } + } else if (use_imm && IsUint<16>(rhs_imm)) { + __ Xori(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Xor(dst, lhs, rhs_reg); + } + return (cond == kCondEQ); + + case kCondLT: + case kCondGE: + if (use_imm && IsInt<16>(rhs_imm)) { + __ Slti(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, lhs, rhs_reg); + } + return (cond == kCondGE); + + case kCondLE: + case kCondGT: + if (use_imm && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + __ Slti(dst, lhs, rhs_imm_plus_one); + return (cond == kCondGT); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Slt(dst, rhs_reg, lhs); + return (cond == kCondLE); + } + + case kCondB: + case kCondAE: + if (use_imm && IsInt<16>(rhs_imm)) { + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, lhs, rhs_reg); + } + return (cond == kCondAE); + + case kCondBE: + case kCondA: + if (use_imm && (rhs_imm_plus_one != 0) && IsInt<16>(rhs_imm_plus_one)) { + // Simulate lhs <= rhs via lhs < rhs + 1. + // Note that this only works if rhs + 1 does not overflow + // to 0, hence the check above. + // Sltiu sign-extends its 16-bit immediate operand before + // the comparison and thus lets us compare directly with + // unsigned values in the ranges [0, 0x7fff] and + // [0x[ffffffff]ffff8000, 0x[ffffffff]ffffffff]. + __ Sltiu(dst, lhs, rhs_imm_plus_one); + return (cond == kCondA); + } else { + if (use_imm) { + rhs_reg = TMP; + __ LoadConst64(rhs_reg, rhs_imm); + } + __ Sltu(dst, rhs_reg, lhs); + return (cond == kCondBE); + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition cond, bool is64bit, LocationSummary* locations, @@ -3711,12 +3941,12 @@ void InstructionCodeGeneratorMIPS64::GenerateIntLongCompareAndBranch(IfCondition void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations) { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { switch (cond) { case kCondEQ: __ CmpEqS(FTMP, lhs, rhs); @@ -3769,7 +3999,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, UNREACHABLE(); } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); switch (cond) { case kCondEQ: __ CmpEqD(FTMP, lhs, rhs); @@ -3824,14 +4054,105 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompare(IfCondition cond, } } +bool InstructionCodeGeneratorMIPS64::MaterializeFpCompare(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* input_locations, + FpuRegister dst) { + FpuRegister lhs = input_locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = input_locations->InAt(1).AsFpuRegister<FpuRegister>(); + if (type == DataType::Type::kFloat32) { + switch (cond) { + case kCondEQ: + __ CmpEqS(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqS(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtS(dst, lhs, rhs); + } else { + __ CmpUltS(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeS(dst, lhs, rhs); + } else { + __ CmpUleS(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltS(dst, rhs, lhs); + } else { + __ CmpLtS(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleS(dst, rhs, lhs); + } else { + __ CmpLeS(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } else { + DCHECK_EQ(type, DataType::Type::kFloat64); + switch (cond) { + case kCondEQ: + __ CmpEqD(dst, lhs, rhs); + return false; + case kCondNE: + __ CmpEqD(dst, lhs, rhs); + return true; + case kCondLT: + if (gt_bias) { + __ CmpLtD(dst, lhs, rhs); + } else { + __ CmpUltD(dst, lhs, rhs); + } + return false; + case kCondLE: + if (gt_bias) { + __ CmpLeD(dst, lhs, rhs); + } else { + __ CmpUleD(dst, lhs, rhs); + } + return false; + case kCondGT: + if (gt_bias) { + __ CmpUltD(dst, rhs, lhs); + } else { + __ CmpLtD(dst, rhs, lhs); + } + return false; + case kCondGE: + if (gt_bias) { + __ CmpUleD(dst, rhs, lhs); + } else { + __ CmpLeD(dst, rhs, lhs); + } + return false; + default: + LOG(FATAL) << "Unexpected non-floating-point condition " << cond; + UNREACHABLE(); + } + } +} + void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations, Mips64Label* label) { FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { switch (cond) { case kCondEQ: __ CmpEqS(FTMP, lhs, rhs); @@ -3875,9 +4196,10 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); switch (cond) { case kCondEQ: __ CmpEqD(FTMP, lhs, rhs); @@ -3921,6 +4243,7 @@ void InstructionCodeGeneratorMIPS64::GenerateFpCompareAndBranch(IfCondition cond break; default: LOG(FATAL) << "Unexpected non-floating-point condition"; + UNREACHABLE(); } } } @@ -3970,7 +4293,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc // The condition instruction has not been materialized, use its inputs as // the comparison and its condition as the branch condition. HCondition* condition = cond->AsCondition(); - Primitive::Type type = condition->InputAt(0)->GetType(); + DataType::Type type = condition->InputAt(0)->GetType(); LocationSummary* locations = cond->GetLocations(); IfCondition if_cond = condition->GetCondition(); Mips64Label* branch_target = true_target; @@ -3984,11 +4307,11 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc default: GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ false, locations, branch_target); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateIntLongCompareAndBranch(if_cond, /* is64bit */ true, locations, branch_target); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: GenerateFpCompareAndBranch(if_cond, condition->IsGtBias(), type, locations, branch_target); break; } @@ -4002,7 +4325,7 @@ void InstructionCodeGeneratorMIPS64::GenerateTestAndBranch(HInstruction* instruc } void LocationsBuilderMIPS64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -4019,7 +4342,7 @@ void InstructionCodeGeneratorMIPS64::VisitIf(HIf* if_instr) { } void LocationsBuilderMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -4039,8 +4362,309 @@ void InstructionCodeGeneratorMIPS64::VisitDeoptimize(HDeoptimize* deoptimize) { /* false_target */ nullptr); } +// This function returns true if a conditional move can be generated for HSelect. +// Otherwise it returns false and HSelect must be implemented in terms of conditonal +// branches and regular moves. +// +// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect. +// +// While determining feasibility of a conditional move and setting inputs/outputs +// are two distinct tasks, this function does both because they share quite a bit +// of common logic. +static bool CanMoveConditionally(HSelect* select, LocationSummary* locations_to_set) { + bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition()); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + HCondition* condition = cond->AsCondition(); + + DataType::Type cond_type = + materialized ? DataType::Type::kInt32 : condition->InputAt(0)->GetType(); + DataType::Type dst_type = select->GetType(); + + HConstant* cst_true_value = select->GetTrueValue()->AsConstant(); + HConstant* cst_false_value = select->GetFalseValue()->AsConstant(); + bool is_true_value_zero_constant = + (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern()); + bool is_false_value_zero_constant = + (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern()); + + bool can_move_conditionally = false; + bool use_const_for_false_in = false; + bool use_const_for_true_in = false; + + if (!cond->IsConstant()) { + if (!DataType::IsFloatingPointType(cond_type)) { + if (!DataType::IsFloatingPointType(dst_type)) { + // Moving int/long on int/long condition. + if (is_true_value_zero_constant) { + // seleqz out_reg, false_reg, cond_reg + can_move_conditionally = true; + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez out_reg, true_reg, cond_reg + can_move_conditionally = true; + use_const_for_false_in = true; + } else if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + // selnez AT, true_reg, cond_reg + // seleqz TMP, false_reg, cond_reg + // or out_reg, AT, TMP + can_move_conditionally = true; + } + } else { + // Moving float/double on int/long condition. + if (materialized) { + // Not materializing unmaterialized int conditions + // to keep the instruction count low. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sltu TMP, ZERO, cond_reg + // mtc1 TMP, temp_cond_reg + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } + } + } else { + if (!DataType::IsFloatingPointType(dst_type)) { + // Moving int/long on float/double condition. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // seleqz out_reg, false_reg, TMP + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // mfc1 TMP, temp_cond_reg + // selnez out_reg, true_reg, TMP + use_const_for_false_in = true; + } else { + // mfc1 TMP, temp_cond_reg + // selnez AT, true_reg, TMP + // seleqz TMP, false_reg, TMP + // or out_reg, AT, TMP + } + } else { + // Moving float/double on float/double condition. + can_move_conditionally = true; + if (is_true_value_zero_constant) { + // seleqz.fmt out_reg, false_reg, temp_cond_reg + use_const_for_true_in = true; + } else if (is_false_value_zero_constant) { + // selnez.fmt out_reg, true_reg, temp_cond_reg + use_const_for_false_in = true; + } else { + // sel.fmt temp_cond_reg, false_reg, true_reg + // mov.fmt out_reg, temp_cond_reg + } + } + } + } + + if (can_move_conditionally) { + DCHECK(!use_const_for_false_in || !use_const_for_true_in); + } else { + DCHECK(!use_const_for_false_in); + DCHECK(!use_const_for_true_in); + } + + if (locations_to_set != nullptr) { + if (use_const_for_false_in) { + locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value)); + } else { + locations_to_set->SetInAt(0, + DataType::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (use_const_for_true_in) { + locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value)); + } else { + locations_to_set->SetInAt(1, + DataType::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } + if (materialized) { + locations_to_set->SetInAt(2, Location::RequiresRegister()); + } + + if (can_move_conditionally) { + locations_to_set->SetOut(DataType::IsFloatingPointType(dst_type) + ? Location::RequiresFpuRegister() + : Location::RequiresRegister()); + } else { + locations_to_set->SetOut(Location::SameAsFirstInput()); + } + } + + return can_move_conditionally; +} + + +void InstructionCodeGeneratorMIPS64::GenConditionalMove(HSelect* select) { + LocationSummary* locations = select->GetLocations(); + Location dst = locations->Out(); + Location false_src = locations->InAt(0); + Location true_src = locations->InAt(1); + HInstruction* cond = select->InputAt(/* condition_input_index */ 2); + GpuRegister cond_reg = TMP; + FpuRegister fcond_reg = FTMP; + DataType::Type cond_type = DataType::Type::kInt32; + bool cond_inverted = false; + DataType::Type dst_type = select->GetType(); + + if (IsBooleanValueOrMaterializedCondition(cond)) { + cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<GpuRegister>(); + } else { + HCondition* condition = cond->AsCondition(); + LocationSummary* cond_locations = cond->GetLocations(); + IfCondition if_cond = condition->GetCondition(); + cond_type = condition->InputAt(0)->GetType(); + switch (cond_type) { + default: + cond_inverted = MaterializeIntLongCompare(if_cond, + /* is64bit */ false, + cond_locations, + cond_reg); + break; + case DataType::Type::kInt64: + cond_inverted = MaterializeIntLongCompare(if_cond, + /* is64bit */ true, + cond_locations, + cond_reg); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + cond_inverted = MaterializeFpCompare(if_cond, + condition->IsGtBias(), + cond_type, + cond_locations, + fcond_reg); + break; + } + } + + if (true_src.IsConstant()) { + DCHECK(true_src.GetConstant()->IsZeroBitPattern()); + } + if (false_src.IsConstant()) { + DCHECK(false_src.GetConstant()->IsZeroBitPattern()); + } + + switch (dst_type) { + default: + if (DataType::IsFloatingPointType(cond_type)) { + __ Mfc1(cond_reg, fcond_reg); + } + if (true_src.IsConstant()) { + if (cond_inverted) { + __ Selnez(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Seleqz(dst.AsRegister<GpuRegister>(), false_src.AsRegister<GpuRegister>(), cond_reg); + } + } else if (false_src.IsConstant()) { + if (cond_inverted) { + __ Seleqz(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Selnez(dst.AsRegister<GpuRegister>(), true_src.AsRegister<GpuRegister>(), cond_reg); + } + } else { + DCHECK_NE(cond_reg, AT); + if (cond_inverted) { + __ Seleqz(AT, true_src.AsRegister<GpuRegister>(), cond_reg); + __ Selnez(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); + } else { + __ Selnez(AT, true_src.AsRegister<GpuRegister>(), cond_reg); + __ Seleqz(TMP, false_src.AsRegister<GpuRegister>(), cond_reg); + } + __ Or(dst.AsRegister<GpuRegister>(), AT, TMP); + } + break; + case DataType::Type::kFloat32: { + if (!DataType::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); + if (true_src.IsConstant()) { + FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SeleqzS(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezS(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelS(fcond_reg, + true_src.AsFpuRegister<FpuRegister>(), + false_src.AsFpuRegister<FpuRegister>()); + } else { + __ SelS(fcond_reg, + false_src.AsFpuRegister<FpuRegister>(), + true_src.AsFpuRegister<FpuRegister>()); + } + __ MovS(dst_reg, fcond_reg); + } + break; + } + case DataType::Type::kFloat64: { + if (!DataType::IsFloatingPointType(cond_type)) { + // sel*.fmt tests bit 0 of the condition register, account for that. + __ Sltu(TMP, ZERO, cond_reg); + __ Mtc1(TMP, fcond_reg); + } + FpuRegister dst_reg = dst.AsFpuRegister<FpuRegister>(); + if (true_src.IsConstant()) { + FpuRegister src_reg = false_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } else { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } + } else if (false_src.IsConstant()) { + FpuRegister src_reg = true_src.AsFpuRegister<FpuRegister>(); + if (cond_inverted) { + __ SeleqzD(dst_reg, src_reg, fcond_reg); + } else { + __ SelnezD(dst_reg, src_reg, fcond_reg); + } + } else { + if (cond_inverted) { + __ SelD(fcond_reg, + true_src.AsFpuRegister<FpuRegister>(), + false_src.AsFpuRegister<FpuRegister>()); + } else { + __ SelD(fcond_reg, + false_src.AsFpuRegister<FpuRegister>(), + true_src.AsFpuRegister<FpuRegister>()); + } + __ MovD(dst_reg, fcond_reg); + } + break; + } + } +} + void LocationsBuilderMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -4053,33 +4677,27 @@ void InstructionCodeGeneratorMIPS64::VisitShouldDeoptimizeFlag(HShouldDeoptimize } void LocationsBuilderMIPS64::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetInAt(1, Location::RequiresFpuRegister()); - } else { - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetInAt(1, Location::RequiresRegister()); - } - if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) { - locations->SetInAt(2, Location::RequiresRegister()); - } - locations->SetOut(Location::SameAsFirstInput()); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); + CanMoveConditionally(select, locations); } void InstructionCodeGeneratorMIPS64::VisitSelect(HSelect* select) { - LocationSummary* locations = select->GetLocations(); - Mips64Label false_target; - GenerateTestAndBranch(select, - /* condition_input_index */ 2, - /* true_target */ nullptr, - &false_target); - codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); - __ Bind(&false_target); + if (CanMoveConditionally(select, /* locations_to_set */ nullptr)) { + GenConditionalMove(select); + } else { + LocationSummary* locations = select->GetLocations(); + Mips64Label false_target; + GenerateTestAndBranch(select, + /* condition_input_index */ 2, + /* true_target */ nullptr, + &false_target); + codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType()); + __ Bind(&false_target); + } } void LocationsBuilderMIPS64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorMIPS64::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -4092,10 +4710,10 @@ void CodeGeneratorMIPS64::GenerateNop() { void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + kEmitCompilerReadBarrier && (field_type == DataType::Type::kReference); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, object_field_get_with_read_barrier ? LocationSummary::kCallOnSlowPath @@ -4104,7 +4722,7 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps in the case of an object field get with @@ -4118,13 +4736,16 @@ void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction, if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier. - locations->AddTemp(Location::RequiresRegister()); + if (!kBakerReadBarrierThunksEnableForFields) { + locations->AddTemp(Location::RequiresRegister()); + } } } void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info) { - Primitive::Type type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); Location obj_loc = locations->InAt(0); GpuRegister obj = obj_loc.AsRegister<GpuRegister>(); @@ -4135,40 +4756,44 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: + case DataType::Type::kUint8: load_type = kLoadUnsignedByte; break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: load_type = kLoadSignedByte; break; - case Primitive::kPrimShort: - load_type = kLoadSignedHalfword; - break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: load_type = kLoadUnsignedHalfword; break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt16: + load_type = kLoadSignedHalfword; + break; + case DataType::Type::kInt32: + case DataType::Type::kFloat32: load_type = kLoadWord; break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: load_type = kLoadDoubleword; break; - case Primitive::kPrimNot: + case DataType::Type::kReference: load_type = kLoadUnsignedWord; break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - if (!Primitive::IsFloatingPointType(type)) { + if (!DataType::IsFloatingPointType(type)) { DCHECK(dst_loc.IsRegister()); GpuRegister dst = dst_loc.AsRegister<GpuRegister>(); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // /* HeapReference<Object> */ dst = *(obj + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { - Location temp_loc = locations->GetTemp(0); + Location temp_loc = + kBakerReadBarrierThunksEnableForFields ? Location::NoLocation() : locations->GetTemp(0); // Note that a potential implicit null check is handled in this // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call. codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4201,7 +4826,7 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, // Memory barriers, in the case of references, are handled in the // previous switch statement. - if (is_volatile && (type != Primitive::kPrimNot)) { + if (is_volatile && (type != DataType::Type::kReference)) { GenerateMemoryBarrier(MemBarrierKind::kLoadAny); } } @@ -4209,9 +4834,9 @@ void InstructionCodeGeneratorMIPS64::HandleFieldGet(HInstruction* instruction, void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info ATTRIBUTE_UNUSED) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1))); } else { locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1))); @@ -4221,7 +4846,7 @@ void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction, void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info, bool value_can_be_null) { - Primitive::Type type = field_info.GetFieldType(); + DataType::Type type = field_info.GetFieldType(); LocationSummary* locations = instruction->GetLocations(); GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>(); Location value_location = locations->InAt(1); @@ -4232,24 +4857,27 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, auto null_checker = GetImplicitNullChecker(instruction, codegen_); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: store_type = kStoreByte; break; - case Primitive::kPrimShort: - case Primitive::kPrimChar: + case DataType::Type::kUint16: + case DataType::Type::kInt16: store_type = kStoreHalfword; break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimNot: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: + case DataType::Type::kReference: store_type = kStoreWord; break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: store_type = kStoreDoubleword; break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } @@ -4262,14 +4890,14 @@ void InstructionCodeGeneratorMIPS64::HandleFieldSet(HInstruction* instruction, int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant()); __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker); } else { - if (!Primitive::IsFloatingPointType(type)) { + if (!DataType::IsFloatingPointType(type)) { DCHECK(value_location.IsRegister()); GpuRegister src = value_location.AsRegister<GpuRegister>(); if (kPoisonHeapReferences && needs_write_barrier) { // Note that in the case where `value` is a null reference, // we do not enter this block, as a null reference does not // need poisoning. - DCHECK_EQ(type, Primitive::kPrimNot); + DCHECK_EQ(type, DataType::Type::kReference); __ PoisonHeapReference(TMP, src); __ StoreToOffset(store_type, TMP, obj, offset, null_checker); } else { @@ -4318,7 +4946,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister( GpuRegister out_reg = out.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kUseBakerReadBarrier || !kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } if (kUseBakerReadBarrier) { // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(out + offset) @@ -4358,7 +4988,9 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( if (read_barrier_option == kWithReadBarrier) { CHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { - DCHECK(maybe_temp.IsRegister()) << maybe_temp; + if (!kBakerReadBarrierThunksEnableForFields) { + DCHECK(maybe_temp.IsRegister()) << maybe_temp; + } // Load with fast path based Baker's read barrier. // /* HeapReference<Object> */ out = *(obj + offset) codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction, @@ -4381,55 +5013,136 @@ void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters( } } -void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( - HInstruction* instruction, - Location root, - GpuRegister obj, - uint32_t offset, - ReadBarrierOption read_barrier_option) { +static inline int GetBakerMarkThunkNumber(GpuRegister reg) { + static_assert(BAKER_MARK_INTROSPECTION_REGISTER_COUNT == 20, "Expecting equal"); + if (reg >= V0 && reg <= T2) { // 13 consequtive regs. + return reg - V0; + } else if (reg >= S2 && reg <= S7) { // 6 consequtive regs. + return 13 + (reg - S2); + } else if (reg == S8) { // One more. + return 19; + } + LOG(FATAL) << "Unexpected register " << reg; + UNREACHABLE(); +} + +static inline int GetBakerMarkFieldArrayThunkDisplacement(GpuRegister reg, bool short_offset) { + int num = GetBakerMarkThunkNumber(reg) + + (short_offset ? BAKER_MARK_INTROSPECTION_REGISTER_COUNT : 0); + return num * BAKER_MARK_INTROSPECTION_FIELD_ARRAY_ENTRY_SIZE; +} + +static inline int GetBakerMarkGcRootThunkDisplacement(GpuRegister reg) { + return GetBakerMarkThunkNumber(reg) * BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRY_SIZE + + BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRIES_OFFSET; +} + +void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(HInstruction* instruction, + Location root, + GpuRegister obj, + uint32_t offset, + ReadBarrierOption read_barrier_option, + Mips64Label* label_low) { + if (label_low != nullptr) { + DCHECK_EQ(offset, 0x5678u); + } GpuRegister root_reg = root.AsRegister<GpuRegister>(); if (read_barrier_option == kWithReadBarrier) { DCHECK(kEmitCompilerReadBarrier); if (kUseBakerReadBarrier) { // Fast path implementation of art::ReadBarrier::BarrierForRoot when // Baker's read barrier are used: - // - // root = obj.field; - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - // if (temp != null) { - // root = temp(root) - // } - - // /* GcRoot<mirror::Object> */ root = *(obj + offset) - __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); - static_assert( - sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), - "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " - "have different sizes."); - static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), - "art::mirror::CompressedReference<mirror::Object> and int32_t " - "have different sizes."); - - // Slow path marking the GC root `root`. - Location temp = Location::RegisterLocation(T9); - SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64( - instruction, - root, - /*entrypoint*/ temp); - codegen_->AddSlowPath(slow_path); + if (kBakerReadBarrierThunksEnableForGcRoots) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // if (temp != nullptr) { + // temp = &gc_root_thunk<root_reg> + // root = temp(root) + // } + + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + const int thunk_disp = GetBakerMarkGcRootThunkDisplacement(root_reg); + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign + // extension in lwu. + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + GpuRegister base = short_offset ? obj : TMP; + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + if (!short_offset) { + DCHECK(!label_low); + __ Daui(base, obj, offset_high); + } + Mips64Label skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); + if (label_low != nullptr) { + DCHECK(short_offset); + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, base, offset_low); // Single instruction + // in delay slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + } else { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded GC root or not. Instead, we + // load into `temp` (T9) the read barrier mark entry point corresponding + // to register `root`. If `temp` is null, it means that `GetIsGcMarking()` + // is false, and vice versa. + // + // GcRoot<mirror::Object> root = *(obj+offset); // Original reference load. + // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() + // if (temp != null) { + // root = temp(root) + // } + + if (label_low != nullptr) { + __ Bind(label_low); + } + // /* GcRoot<mirror::Object> */ root = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); + static_assert( + sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>), + "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> " + "have different sizes."); + static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t), + "art::mirror::CompressedReference<mirror::Object> and int32_t " + "have different sizes."); + + // Slow path marking the GC root `root`. + Location temp = Location::RegisterLocation(T9); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64( + instruction, + root, + /*entrypoint*/ temp); + codegen_->AddSlowPath(slow_path); - // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg() - const int32_t entry_point_offset = - Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); - // Loading the entrypoint does not require a load acquire since it is only changed when - // threads are suspended or running a checkpoint. - __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); - // The entrypoint is null when the GC is not marking, this prevents one load compared to - // checking GetIsGcMarking. - __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); - __ Bind(slow_path->GetExitLabel()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset); + __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel()); + __ Bind(slow_path->GetExitLabel()); + } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // GC root loaded through a slow path for read barriers other // than Baker's. // /* GcRoot<mirror::Object>* */ root = obj + offset @@ -4438,6 +5151,9 @@ void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad( codegen_->GenerateReadBarrierForRootSlow(instruction, root, root); } } else { + if (label_low != nullptr) { + __ Bind(label_low); + } // Plain GC root load with no read barrier. // /* GcRoot<mirror::Object> */ root = *(obj + offset) __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset); @@ -4455,6 +5171,74 @@ void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* in DCHECK(kEmitCompilerReadBarrier); DCHECK(kUseBakerReadBarrier); + if (kBakerReadBarrierThunksEnableForFields) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // If the offset is too large to fit into the lw instruction, we + // // use an adjusted base register (TMP) here. This register + // // receives bits 16 ... 31 of the offset before the thunk invocation + // // and the thunk benefits from it. + // HeapReference<mirror::Object> reference = *(obj+offset); // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + bool short_offset = IsInt<16>(static_cast<int32_t>(offset)); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // There may have or may have not been a null check if the field offset is smaller than + // the page size. + // There must've been a null check in case it's actually a load from an array. + // We will, however, perform an explicit null check in the thunk as it's easier to + // do it than not. + if (instruction->IsArrayGet()) { + DCHECK(!needs_null_check); + } + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, short_offset); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + Mips64Label skip_call; + if (short_offset) { + __ Beqzc(T9, &skip_call, /* is_bare */ true); + __ Nop(); // In forbidden slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset); // Single instruction. + } else { + int16_t offset_low = Low16Bits(offset); + int16_t offset_high = High16Bits(offset - offset_low); // Accounts for sign extension in lwu. + __ Beqz(T9, &skip_call, /* is_bare */ true); + __ Daui(TMP, obj, offset_high); // In delay slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + // /* HeapReference<Object> */ ref = *(obj + offset) + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset_low); // Single instruction. + } + if (needs_null_check) { + MaybeRecordImplicitNullCheck(instruction); + } + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = *(obj + offset) Location no_index = Location::NoLocation(); ScaleFactor no_scale_factor = TIMES_1; @@ -4481,9 +5265,59 @@ void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* in static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); + ScaleFactor scale_factor = TIMES_4; + + if (kBakerReadBarrierThunksEnableForArrays) { + // Note that we do not actually check the value of `GetIsGcMarking()` + // to decide whether to mark the loaded reference or not. Instead, we + // load into `temp` (T9) the read barrier mark introspection entrypoint. + // If `temp` is null, it means that `GetIsGcMarking()` is false, and + // vice versa. + // + // We use thunks for the slow path. That thunk checks the reference + // and jumps to the entrypoint if needed. If the holder is not gray, + // it issues a load-load memory barrier and returns to the original + // reference load. + // + // temp = Thread::Current()->pReadBarrierMarkReg00 + // // AKA &art_quick_read_barrier_mark_introspection. + // if (temp != nullptr) { + // temp = &field_array_thunk<holder_reg> + // temp() + // } + // not_gray_return_address: + // // The element address is pre-calculated in the TMP register before the + // // thunk invocation and the thunk benefits from it. + // HeapReference<mirror::Object> reference = data[index]; // Original reference load. + // gray_return_address: + + DCHECK(temp.IsInvalid()); + DCHECK(index.IsValid()); + const int32_t entry_point_offset = + Thread::ReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(0); + // We will not do the explicit null check in the thunk as some form of a null check + // must've been done earlier. + DCHECK(!needs_null_check); + const int thunk_disp = GetBakerMarkFieldArrayThunkDisplacement(obj, /* short_offset */ false); + // Loading the entrypoint does not require a load acquire since it is only changed when + // threads are suspended or running a checkpoint. + __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset); + Mips64Label skip_call; + __ Beqz(T9, &skip_call, /* is_bare */ true); + GpuRegister ref_reg = ref.AsRegister<GpuRegister>(); + GpuRegister index_reg = index.AsRegister<GpuRegister>(); + __ Dlsa(TMP, index_reg, obj, scale_factor); // In delay slot. + __ Jialc(T9, thunk_disp); + __ Bind(&skip_call); + // /* HeapReference<Object> */ ref = *(obj + data_offset + (index << scale_factor)) + DCHECK(IsInt<16>(static_cast<int32_t>(data_offset))) << data_offset; + __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, data_offset); // Single instruction. + __ MaybeUnpoisonHeapReference(ref_reg); + return; + } + // /* HeapReference<Object> */ ref = // *(obj + data_offset + index * sizeof(HeapReference<Object>)) - ScaleFactor scale_factor = TIMES_4; GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, @@ -4576,14 +5410,14 @@ void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction // above are expected to be null in this code path. DCHECK_EQ(offset, 0u); DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1); - slow_path = new (GetGraph()->GetArena()) + slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction, ref, obj, /* field_offset */ index, temp_reg); } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); + slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathMIPS64(instruction, ref); } AddSlowPath(slow_path); @@ -4619,7 +5453,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) + SlowPathCodeMIPS64* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -4655,7 +5489,7 @@ void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instructi // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root); AddSlowPath(slow_path); __ Bc(slow_path->GetEntryLabel()); @@ -4670,11 +5504,12 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -4682,7 +5517,8 @@ void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -4721,13 +5557,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Classes must be equal for the instanceof to succeed. __ Xor(out, out, cls); __ Sltiu(out, out, 1); @@ -4735,13 +5573,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. Mips64Label loop; @@ -4751,7 +5591,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ Bnec(out, cls, &loop); @@ -4760,13 +5600,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. Mips64Label loop, success; __ Bind(&loop); @@ -4776,7 +5618,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ Bnezc(out, &loop); // If `out` is null, we use it for the result, and jump to `done`. __ Bc(&done); @@ -4786,13 +5628,15 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. Mips64Label success; __ Beqc(out, cls, &success); @@ -4802,7 +5646,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); // If `out` is null, we use it for the result, and jump to `done`. __ Beqzc(out, &done); __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset); @@ -4824,8 +5668,8 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { maybe_temp_loc, kWithoutReadBarrier); DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ Bnec(out, cls, slow_path->GetEntryLabel()); __ LoadConst32(out, 1); @@ -4853,8 +5697,8 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathMIPS64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ Bc(slow_path->GetEntryLabel()); break; @@ -4869,7 +5713,7 @@ void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) { } void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -4878,7 +5722,7 @@ void InstructionCodeGeneratorMIPS64::VisitIntConstant(HIntConstant* constant ATT } void LocationsBuilderMIPS64::VisitNullConstant(HNullConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -4995,6 +5839,7 @@ HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind( bool fallback_load = false; switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5021,6 +5866,7 @@ HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5068,9 +5914,9 @@ void CodeGeneratorMIPS64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: { DCHECK(GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - NewPcRelativeMethodPatch(invoke->GetTargetMethod()); + NewBootImageMethodPatch(invoke->GetTargetMethod()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - NewPcRelativeMethodPatch(invoke->GetTargetMethod(), info_high); + NewBootImageMethodPatch(invoke->GetTargetMethod(), info_high); EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678); break; @@ -5191,7 +6037,7 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -5202,8 +6048,6 @@ void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) { if (load_kind == HLoadClass::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the type resolution or initialization and marking to save everything we need. - // Request a temp to hold the BSS entry location for the slow path. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -5237,7 +6081,6 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S ? kWithoutReadBarrier : kCompilerReadBarrierOption; bool generate_null_check = false; - CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high = nullptr; switch (load_kind) { case HLoadClass::LoadKind::kReferrersClass: DCHECK(!cls->CanCallRuntime()); @@ -5253,9 +6096,9 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); break; @@ -5270,16 +6113,34 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S codegen_->DeduplicateBootImageAddressLiteral(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Lwu(out, AT, /* placeholder */ 0x5678); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ Daddiu(out, out, -masked_hash); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { - bss_info_high = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* bss_info_high = + codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex(), bss_info_high); - constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; - GpuRegister temp = non_baker_read_barrier - ? out - : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, temp, info_low); - GenerateGcRootFieldLoad(cls, out_loc, temp, /* placeholder */ 0x5678, read_barrier_option); + codegen_->EmitPcRelativeAddressPlaceholderHigh(bss_info_high, out); + GenerateGcRootFieldLoad(cls, + out_loc, + out, + /* placeholder */ 0x5678, + read_barrier_option, + &info_low->label); generate_null_check = true; break; } @@ -5299,8 +6160,8 @@ void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathMIPS64( - cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck(), bss_info_high); + SlowPathCodeMIPS64* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathMIPS64( + cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); if (generate_null_check) { __ Beqzc(out, slow_path->GetEntryLabel()); @@ -5319,7 +6180,7 @@ static int32_t GetExceptionTlsOffset() { void LocationsBuilderMIPS64::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -5329,7 +6190,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadException(HLoadException* load) { } void LocationsBuilderMIPS64::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -5339,7 +6200,7 @@ void InstructionCodeGeneratorMIPS64::VisitClearException(HClearException* clear void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { HLoadString::LoadKind load_kind = load->GetLoadKind(); LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); if (load_kind == HLoadString::LoadKind::kRuntimeCall) { InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -5348,8 +6209,6 @@ void LocationsBuilderMIPS64::VisitLoadString(HLoadString* load) { if (load_kind == HLoadString::LoadKind::kBssEntry) { if (!kUseReadBarrier || kUseBakerReadBarrier) { // Rely on the pResolveString and marking to save everything we need. - // Request a temp to hold the BSS entry location for the slow path. - locations->AddTemp(Location::RequiresRegister()); RegisterSet caller_saves = RegisterSet::Empty(); InvokeRuntimeCallingConvention calling_convention; caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -5373,12 +6232,12 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); __ Daddiu(out, AT, /* placeholder */ 0x5678); - return; // No dex cache slow path. + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( @@ -5387,26 +6246,33 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA __ LoadLiteral(out, kLoadUnsignedWord, codegen_->DeduplicateBootImageAddressLiteral(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex()); + CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = + codegen_->NewBootImageStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, AT, info_low); + __ Lwu(out, AT, /* placeholder */ 0x5678); + return; } case HLoadString::LoadKind::kBssEntry: { DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_high = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex()); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex()); CodeGeneratorMIPS64::PcRelativePatchInfo* info_low = - codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex(), info_high); - constexpr bool non_baker_read_barrier = kUseReadBarrier && !kUseBakerReadBarrier; - GpuRegister temp = non_baker_read_barrier - ? out - : locations->GetTemp(0).AsRegister<GpuRegister>(); - codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, temp, info_low); + codegen_->NewStringBssEntryPatch(load->GetDexFile(), load->GetStringIndex(), info_high); + codegen_->EmitPcRelativeAddressPlaceholderHigh(info_high, out); GenerateGcRootFieldLoad(load, out_loc, - temp, + out, /* placeholder */ 0x5678, - kCompilerReadBarrierOption); + kCompilerReadBarrierOption, + &info_low->label); SlowPathCodeMIPS64* slow_path = - new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load, info_high); + new (codegen_->GetScopedAllocator()) LoadStringSlowPathMIPS64(load); codegen_->AddSlowPath(slow_path); __ Beqzc(out, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); @@ -5434,7 +6300,7 @@ void InstructionCodeGeneratorMIPS64::VisitLoadString(HLoadString* load) NO_THREA } void LocationsBuilderMIPS64::VisitLongConstant(HLongConstant* constant) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(constant); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(constant); locations->SetOut(Location::ConstantLocation(constant)); } @@ -5443,8 +6309,8 @@ void InstructionCodeGeneratorMIPS64::VisitLongConstant(HLongConstant* constant A } void LocationsBuilderMIPS64::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -5462,17 +6328,17 @@ void InstructionCodeGeneratorMIPS64::VisitMonitorOperation(HMonitorOperation* in void LocationsBuilderMIPS64::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -5484,27 +6350,27 @@ void LocationsBuilderMIPS64::VisitMul(HMul* mul) { } void InstructionCodeGeneratorMIPS64::VisitMul(HMul* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) + if (type == DataType::Type::kInt32) __ MulR6(dst, lhs, rhs); else __ Dmul(dst, lhs, rhs); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) + if (type == DataType::Type::kFloat32) __ MulS(dst, lhs, rhs); else __ MulD(dst, lhs, rhs); @@ -5517,16 +6383,16 @@ void InstructionCodeGeneratorMIPS64::VisitMul(HMul* instruction) { void LocationsBuilderMIPS64::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -5537,25 +6403,25 @@ void LocationsBuilderMIPS64::VisitNeg(HNeg* neg) { } void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) + if (type == DataType::Type::kInt32) __ Subu(dst, ZERO, src); else __ Dsubu(dst, ZERO, src); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - if (type == Primitive::kPrimFloat) + if (type == DataType::Type::kFloat32) __ NegS(dst, src); else __ NegD(dst, src); @@ -5567,10 +6433,10 @@ void InstructionCodeGeneratorMIPS64::VisitNeg(HNeg* instruction) { } void LocationsBuilderMIPS64::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); } @@ -5586,15 +6452,15 @@ void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) { } void LocationsBuilderMIPS64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); } else { locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) { @@ -5617,18 +6483,18 @@ void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) } void LocationsBuilderMIPS64::VisitNot(HNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void InstructionCodeGeneratorMIPS64::VisitNot(HNot* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); LocationSummary* locations = instruction->GetLocations(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); __ Nor(dst, src, ZERO); @@ -5641,7 +6507,7 @@ void InstructionCodeGeneratorMIPS64::VisitNot(HNot* instruction) { } void LocationsBuilderMIPS64::VisitBooleanNot(HBooleanNot* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -5669,7 +6535,8 @@ void CodeGeneratorMIPS64::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorMIPS64::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathMIPS64(instruction); + SlowPathCodeMIPS64* slow_path = + new (GetScopedAllocator()) NullCheckSlowPathMIPS64(instruction); AddSlowPath(slow_path); Location obj = instruction->GetLocations()->InAt(0); @@ -5694,11 +6561,18 @@ void LocationsBuilderMIPS64::VisitParallelMove(HParallelMove* instruction ATTRIB } void InstructionCodeGeneratorMIPS64::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderMIPS64::VisitParameterValue(HParameterValue* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -5715,7 +6589,7 @@ void InstructionCodeGeneratorMIPS64::VisitParameterValue(HParameterValue* instru void LocationsBuilderMIPS64::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } @@ -5725,7 +6599,7 @@ void InstructionCodeGeneratorMIPS64::VisitCurrentMethod(HCurrentMethod* instruct } void LocationsBuilderMIPS64::VisitPhi(HPhi* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -5737,22 +6611,22 @@ void InstructionCodeGeneratorMIPS64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED } void LocationsBuilderMIPS64::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); LocationSummary::CallKind call_kind = - Primitive::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly - : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + DataType::IsFloatingPointType(type) ? LocationSummary::kCallOnMainOnly + : LocationSummary::kNoCall; + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); @@ -5766,19 +6640,20 @@ void LocationsBuilderMIPS64::VisitRem(HRem* rem) { } void InstructionCodeGeneratorMIPS64::VisitRem(HRem* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: GenerateDivRemIntegral(instruction); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - QuickEntrypointEnum entrypoint = (type == Primitive::kPrimFloat) ? kQuickFmodf : kQuickFmod; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { + QuickEntrypointEnum entrypoint = + (type == DataType::Type::kFloat32) ? kQuickFmodf : kQuickFmod; codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc()); - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { CheckEntrypointTypes<kQuickFmodf, float, float, float>(); } else { CheckEntrypointTypes<kQuickFmod, double, double, double>(); @@ -5808,8 +6683,8 @@ void InstructionCodeGeneratorMIPS64::VisitMemoryBarrier(HMemoryBarrier* memory_b } void LocationsBuilderMIPS64::VisitReturn(HReturn* ret) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(ret); - Primitive::Type return_type = ret->InputAt(0)->GetType(); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(ret); + DataType::Type return_type = ret->InputAt(0)->GetType(); locations->SetInAt(0, Mips64ReturnLocation(return_type)); } @@ -5942,8 +6817,8 @@ void InstructionCodeGeneratorMIPS64::VisitUnresolvedStaticFieldSet( } void LocationsBuilderMIPS64::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). @@ -5966,8 +6841,8 @@ void InstructionCodeGeneratorMIPS64::VisitSuspendCheck(HSuspendCheck* instructio } void LocationsBuilderMIPS64::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -5978,24 +6853,25 @@ void InstructionCodeGeneratorMIPS64::VisitThrow(HThrow* instruction) { } void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { - Primitive::Type input_type = conversion->GetInputType(); - Primitive::Type result_type = conversion->GetResultType(); - DCHECK_NE(input_type, result_type); + DataType::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; - if ((input_type == Primitive::kPrimNot) || (input_type == Primitive::kPrimVoid) || - (result_type == Primitive::kPrimNot) || (result_type == Primitive::kPrimVoid)) { + if ((input_type == DataType::Type::kReference) || (input_type == DataType::Type::kVoid) || + (result_type == DataType::Type::kReference) || (result_type == DataType::Type::kVoid)) { LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(conversion); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(conversion); - if (Primitive::IsFloatingPointType(input_type)) { + if (DataType::IsFloatingPointType(input_type)) { locations->SetInAt(0, Location::RequiresFpuRegister()); } else { locations->SetInAt(0, Location::RequiresRegister()); } - if (Primitive::IsFloatingPointType(result_type)) { + if (DataType::IsFloatingPointType(result_type)) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -6004,21 +6880,22 @@ void LocationsBuilderMIPS64::VisitTypeConversion(HTypeConversion* conversion) { void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conversion) { LocationSummary* locations = conversion->GetLocations(); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); - DCHECK_NE(input_type, result_type); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; - if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); switch (result_type) { - case Primitive::kPrimChar: - __ Andi(dst, src, 0xFFFF); + case DataType::Type::kUint8: + __ Andi(dst, src, 0xFF); break; - case Primitive::kPrimByte: - if (input_type == Primitive::kPrimLong) { + case DataType::Type::kInt8: + if (input_type == DataType::Type::kInt64) { // Type conversion from long to types narrower than int is a result of code // transformations. To avoid unpredictable results for SEB and SEH, we first // need to sign-extend the low 32-bit value into bits 32 through 63. @@ -6028,8 +6905,11 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver __ Seb(dst, src); } break; - case Primitive::kPrimShort: - if (input_type == Primitive::kPrimLong) { + case DataType::Type::kUint16: + __ Andi(dst, src, 0xFFFF); + break; + case DataType::Type::kInt16: + if (input_type == DataType::Type::kInt64) { // Type conversion from long to types narrower than int is a result of code // transformations. To avoid unpredictable results for SEB and SEH, we first // need to sign-extend the low 32-bit value into bits 32 through 63. @@ -6039,12 +6919,12 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver __ Seh(dst, src); } break; - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: // Sign-extend 32-bit int into bits 32 through 63 for int-to-long and long-to-int // conversions, except when the input and output registers are the same and we are not // converting longs to shorter types. In these cases, do nothing. - if ((input_type == Primitive::kPrimLong) || (dst != src)) { + if ((input_type == DataType::Type::kInt64) || (dst != src)) { __ Sll(dst, src, 0); } break; @@ -6053,49 +6933,49 @@ void InstructionCodeGeneratorMIPS64::VisitTypeConversion(HTypeConversion* conver LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; } - } else if (Primitive::IsFloatingPointType(result_type) && Primitive::IsIntegralType(input_type)) { + } else if (DataType::IsFloatingPointType(result_type) && DataType::IsIntegralType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); GpuRegister src = locations->InAt(0).AsRegister<GpuRegister>(); - if (input_type == Primitive::kPrimLong) { + if (input_type == DataType::Type::kInt64) { __ Dmtc1(src, FTMP); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsl(dst, FTMP); } else { __ Cvtdl(dst, FTMP); } } else { __ Mtc1(src, FTMP); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsw(dst, FTMP); } else { __ Cvtdw(dst, FTMP); } } - } else if (Primitive::IsIntegralType(result_type) && Primitive::IsFloatingPointType(input_type)) { - CHECK(result_type == Primitive::kPrimInt || result_type == Primitive::kPrimLong); + } else if (DataType::IsIntegralType(result_type) && DataType::IsFloatingPointType(input_type)) { + CHECK(result_type == DataType::Type::kInt32 || result_type == DataType::Type::kInt64); GpuRegister dst = locations->Out().AsRegister<GpuRegister>(); FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - if (result_type == Primitive::kPrimLong) { - if (input_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kInt64) { + if (input_type == DataType::Type::kFloat32) { __ TruncLS(FTMP, src); } else { __ TruncLD(FTMP, src); } __ Dmfc1(dst, FTMP); } else { - if (input_type == Primitive::kPrimFloat) { + if (input_type == DataType::Type::kFloat32) { __ TruncWS(FTMP, src); } else { __ TruncWD(FTMP, src); } __ Mfc1(dst, FTMP); } - } else if (Primitive::IsFloatingPointType(result_type) && - Primitive::IsFloatingPointType(input_type)) { + } else if (DataType::IsFloatingPointType(result_type) && + DataType::IsFloatingPointType(input_type)) { FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>(); FpuRegister src = locations->InAt(0).AsFpuRegister<FpuRegister>(); - if (result_type == Primitive::kPrimFloat) { + if (result_type == DataType::Type::kFloat32) { __ Cvtsd(dst, src); } else { __ Cvtds(dst, src); @@ -6215,7 +7095,7 @@ void InstructionCodeGeneratorMIPS64::VisitAboveOrEqual(HAboveOrEqual* comp) { // Simple implementation of packed switch - generate cascaded compare/jumps. void LocationsBuilderMIPS64::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); } @@ -6311,7 +7191,7 @@ void InstructionCodeGeneratorMIPS64::VisitPackedSwitch(HPackedSwitch* switch_ins void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -6339,5 +7219,15 @@ void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruct } } +void LocationsBuilderMIPS64::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorMIPS64::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + } // namespace mips64 } // namespace art diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index c94cc93dad..e6b69c469f 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -18,10 +18,10 @@ #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_MIPS64_H_ #include "code_generator.h" +#include "dex/type_reference.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" -#include "type_reference.h" #include "utils/mips64/assembler_mips64.h" namespace art { @@ -79,8 +79,8 @@ class InvokeDexCallingConventionVisitorMIPS64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorMIPS64() {} virtual ~InvokeDexCallingConventionVisitorMIPS64() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type type) const OVERRIDE; Location GetMethodLocation() const OVERRIDE; private: @@ -98,7 +98,7 @@ class InvokeRuntimeCallingConvention : public CallingConvention<GpuRegister, Fpu kRuntimeParameterFpuRegistersLength, kMips64PointerSize) {} - Location GetReturnLocation(Primitive::Type return_type); + Location GetReturnLocation(DataType::Type return_type); private: DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConvention); @@ -114,16 +114,16 @@ class FieldAccessCallingConventionMIPS64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const OVERRIDE { return Location::RegisterLocation(A0); } - Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::RegisterLocation(V0); } - Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED, + Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { return is_instance ? Location::RegisterLocation(A2) : Location::RegisterLocation(A1); } - Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::FpuRegisterLocation(F0); } @@ -142,6 +142,7 @@ class ParallelMoveResolverMIPS64 : public ParallelMoveResolverWithSwap { void RestoreScratch(int reg) OVERRIDE; void Exchange(int index1, int index2, bool double_slot); + void ExchangeQuadSlots(int index1, int index2); Mips64Assembler* GetAssembler() const; @@ -281,7 +282,8 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { Location root, GpuRegister obj, uint32_t offset, - ReadBarrierOption read_barrier_option); + ReadBarrierOption read_barrier_option, + Mips64Label* label_low = nullptr); void GenerateTestAndBranch(HInstruction* instruction, size_t condition_input_index, @@ -292,17 +294,32 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction); void GenerateDivRemIntegral(HBinaryOperation* instruction); void GenerateIntLongCompare(IfCondition cond, bool is64bit, LocationSummary* locations); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeIntLongCompare(IfCondition cond, + bool is64bit, + LocationSummary* input_locations, + GpuRegister dst); void GenerateIntLongCompareAndBranch(IfCondition cond, bool is64bit, LocationSummary* locations, Mips64Label* label); void GenerateFpCompare(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations); + // When the function returns `false` it means that the condition holds if `dst` is non-zero + // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero + // `dst` are exchanged. + bool MaterializeFpCompare(IfCondition cond, + bool gt_bias, + DataType::Type type, + LocationSummary* input_locations, + FpuRegister dst); void GenerateFpCompareAndBranch(IfCondition cond, bool gt_bias, - Primitive::Type type, + DataType::Type type, LocationSummary* locations, Mips64Label* label); void HandleGoto(HInstruction* got, HBasicBlock* successor); @@ -319,6 +336,7 @@ class InstructionCodeGeneratorMIPS64 : public InstructionCodeGenerator { int32_t VecAddress(LocationSummary* locations, size_t size, /* out */ GpuRegister* adjusted_base); + void GenConditionalMove(HSelect* select); Mips64Assembler* const assembler_; CodeGeneratorMIPS64* const codegen_; @@ -357,7 +375,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { const Mips64Assembler& GetAssembler() const OVERRIDE { return assembler_; } // Emit linker patches. - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE; // Fast path implementation of ReadBarrier::Barrier for a heap @@ -480,14 +498,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void Finalize(CodeAllocator* allocator) OVERRIDE; // Code generation helpers. - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; - void SwapLocations(Location loc1, Location loc2, Primitive::Type type); + void SwapLocations(Location loc1, Location loc2, DataType::Type type); // Generate code to invoke a runtime entry point. void InvokeRuntime(QuickEntrypointEnum entrypoint, @@ -505,7 +523,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; } - bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } // Check if the desired_string_load_kind is supported. If it is, return it, // otherwise return a fall-back kind that should be used instead. @@ -529,7 +547,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED, - Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE { + DataType::Type type ATTRIBUTE_UNUSED) OVERRIDE { UNIMPLEMENTED(FATAL) << "Not implemented on MIPS64"; } @@ -537,9 +555,9 @@ class CodeGeneratorMIPS64 : public CodeGenerator { void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE; void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE; - // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays, - // boot image strings and method calls. The only difference is the interpretation of - // the offset_or_index. + // The PcRelativePatchInfo is used for PC-relative addressing of methods/strings/types, + // whether through .data.bimg.rel.ro, .bss, or directly in the boot image. + // // The 16-bit halves of the 32-bit PC-relative offset are patched separately, necessitating // two patches/infos. There can be more than two patches/infos if the instruction supplying // the high half is shared with e.g. a slow path, while the low half is supplied by separate @@ -553,20 +571,13 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // ... // sw r2, low(r1) // patch // bc back - struct PcRelativePatchInfo { - PcRelativePatchInfo(const DexFile& dex_file, + struct PcRelativePatchInfo : PatchInfo<Mips64Label> { + PcRelativePatchInfo(const DexFile* dex_file, uint32_t off_or_idx, const PcRelativePatchInfo* info_high) - : target_dex_file(dex_file), - offset_or_index(off_or_idx), - label(), + : PatchInfo<Mips64Label>(dex_file, off_or_idx), patch_info_high(info_high) { } - const DexFile& target_dex_file; - // Either the dex cache array element offset or the string/type/method index. - uint32_t offset_or_index; - // Label for the instruction to patch. - Mips64Label label; // Pointer to the info for the high half patch or nullptr if this is the high half patch info. const PcRelativePatchInfo* patch_info_high; @@ -575,24 +586,27 @@ class CodeGeneratorMIPS64 : public CodeGenerator { DISALLOW_COPY_AND_ASSIGN(PcRelativePatchInfo); }; - PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageMethodPatch(MethodReference target_method, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, - dex::TypeIndex type_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageTypePatch(const DexFile& dex_file, + dex::TypeIndex type_index, + const PcRelativePatchInfo* info_high = nullptr); PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index, const PcRelativePatchInfo* info_high = nullptr); - PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file, - dex::StringIndex string_index, - const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewBootImageStringPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); + PcRelativePatchInfo* NewStringBssEntryPatch(const DexFile& dex_file, + dex::StringIndex string_index, + const PcRelativePatchInfo* info_high = nullptr); Literal* DeduplicateBootImageAddressLiteral(uint64_t address); void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info_high, GpuRegister out, - PcRelativePatchInfo* info_low); + PcRelativePatchInfo* info_low = nullptr); void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, @@ -618,14 +632,14 @@ class CodeGeneratorMIPS64 : public CodeGenerator { Literal* DeduplicateUint32Literal(uint32_t value, Uint32ToLiteralMap* map); Literal* DeduplicateUint64Literal(uint64_t value); - PcRelativePatchInfo* NewPcRelativePatch(const DexFile& dex_file, + PcRelativePatchInfo* NewPcRelativePatch(const DexFile* dex_file, uint32_t offset_or_index, const PcRelativePatchInfo* info_high, ArenaDeque<PcRelativePatchInfo>* patches); - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> void EmitPcRelativeLinkerPatches(const ArenaDeque<PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Mips64Label* block_labels_; // Indexed by block id. @@ -642,15 +656,17 @@ class CodeGeneratorMIPS64 : public CodeGenerator { // address. Uint64ToLiteralMap uint64_literals_; // PC-relative method patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_method_patches_; // PC-relative method patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_; // PC-relative type patch info for kBootImageLinkTimePcRelative. - ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_; + ArenaDeque<PcRelativePatchInfo> boot_image_type_patches_; // PC-relative type patch info for kBssEntry. ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_; - // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC). - ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_; + // PC-relative String patch info; type depends on configuration (intern table or boot image PIC). + ArenaDeque<PcRelativePatchInfo> boot_image_string_patches_; + // PC-relative type patch info for kBssEntry. + ArenaDeque<PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. StringToLiteralMap jit_string_patches_; diff --git a/compiler/optimizing/code_generator_utils.cc b/compiler/optimizing/code_generator_utils.cc index 96fe2a17e6..dd47a1fc6c 100644 --- a/compiler/optimizing/code_generator_utils.cc +++ b/compiler/optimizing/code_generator_utils.cc @@ -15,9 +15,10 @@ */ #include "code_generator_utils.h" -#include "nodes.h" -#include "base/logging.h" +#include <android-base/logging.h> + +#include "nodes.h" namespace art { diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc index f422b9fc8b..1cfdf54816 100644 --- a/compiler/optimizing/code_generator_vector_arm64.cc +++ b/compiler/optimizing/code_generator_vector_arm64.cc @@ -15,7 +15,9 @@ */ #include "code_generator_arm64.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" using namespace vixl::aarch64; // NOLINT(build/namespaces) @@ -25,30 +27,32 @@ namespace arm64 { using helpers::ARM64EncodableConstantOrRegister; using helpers::Arm64CanEncodeConstantAsImmediate; using helpers::DRegisterFrom; -using helpers::VRegisterFrom; using helpers::HeapOperand; using helpers::InputRegisterAt; using helpers::Int64ConstantFrom; -using helpers::XRegisterFrom; +using helpers::OutputRegister; +using helpers::VRegisterFrom; using helpers::WRegisterFrom; +using helpers::XRegisterFrom; #define __ GetVIXLAssembler()-> void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); HInstruction* input = instruction->InputAt(0); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction)); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: if (input->IsConstant() && Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) { locations->SetInAt(0, Location::ConstantLocation(input->AsConstant())); @@ -69,8 +73,9 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* Location src_loc = locations->InAt(0); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Movi(dst.V16B(), Int64ConstantFrom(src_loc)); @@ -78,8 +83,8 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* __ Dup(dst.V16B(), InputRegisterAt(instruction, 0)); } break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Movi(dst.V8H(), Int64ConstantFrom(src_loc)); @@ -87,7 +92,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* __ Dup(dst.V8H(), InputRegisterAt(instruction, 0)); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Movi(dst.V4S(), Int64ConstantFrom(src_loc)); @@ -95,7 +100,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* __ Dup(dst.V4S(), InputRegisterAt(instruction, 0)); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Movi(dst.V2D(), Int64ConstantFrom(src_loc)); @@ -103,7 +108,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* __ Dup(dst.V2D(), XRegisterFrom(src_loc)); } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue()); @@ -111,7 +116,7 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0); } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); if (src_loc.IsConstant()) { __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue()); @@ -125,39 +130,72 @@ void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* } } -void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V4S(), 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Umov(OutputRegister(instruction), src.V2D(), 0); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), instruction->IsVecNot() ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -167,17 +205,57 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister src = VRegisterFrom(locations->InAt(0)); + VRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addv(dst.S(), src.V4S()); + break; + case HVecReduce::kMin: + __ Sminv(dst.S(), src.V4S()); + break; + case HVecReduce::kMax: + __ Smaxv(dst.S(), src.V4S()); + break; + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Addp(dst.D(), src.V2D()); + break; + default: + LOG(FATAL) << "Unsupported SIMD min/max"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); VRegister src = VRegisterFrom(locations->InAt(0)); VRegister dst = VRegisterFrom(locations->Out()); - Primitive::Type from = instruction->GetInputType(); - Primitive::Type to = instruction->GetResultType(); - if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DataType::Type from = instruction->GetInputType(); + DataType::Type to = instruction->GetResultType(); + if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Scvtf(dst.V4S(), src.V4S()); } else { @@ -186,7 +264,7 @@ void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { @@ -194,28 +272,29 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { VRegister src = VRegisterFrom(locations->InAt(0)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Neg(dst.V16B(), src.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Neg(dst.V8H(), src.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Neg(dst.V4S(), src.V4S()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Neg(dst.V2D(), src.V2D()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fneg(dst.V4S(), src.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fneg(dst.V2D(), src.V2D()); break; @@ -226,7 +305,7 @@ void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { @@ -234,38 +313,38 @@ void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) { VRegister src = VRegisterFrom(locations->InAt(0)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Abs(dst.V16B(), src.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Abs(dst.V8H(), src.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Abs(dst.V4S(), src.V4S()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Abs(dst.V2D(), src.V2D()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fabs(dst.V4S(), src.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fabs(dst.V2D(), src.V2D()); break; default: LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); } } void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { @@ -273,16 +352,17 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { VRegister src = VRegisterFrom(locations->InAt(0)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: // special case boolean-not + case DataType::Type::kBool: // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); __ Movi(dst.V16B(), 1); __ Eor(dst.V16B(), dst.V16B(), src.V16B()); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: __ Not(dst.V16B(), src.V16B()); // lanes do not matter break; default: @@ -292,17 +372,18 @@ void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -314,7 +395,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { @@ -323,28 +404,29 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Add(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Add(dst.V8H(), lhs.V8H(), rhs.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Add(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Add(dst.V2D(), lhs.V2D(), rhs.V2D()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D()); break; @@ -355,7 +437,7 @@ void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -364,30 +446,29 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) - : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B()); - } else { - instruction->IsRounded() - ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) - : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B()); - } + instruction->IsRounded() + ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) + : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B()) + : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) - : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H()); - } else { - instruction->IsRounded() - ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) - : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); - } + instruction->IsRounded() + ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) + : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H()) + : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H()); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -396,7 +477,7 @@ void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instructi } void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { @@ -405,28 +486,29 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D()); break; @@ -437,7 +519,7 @@ void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { @@ -446,24 +528,25 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D()); break; @@ -474,7 +557,7 @@ void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { @@ -483,11 +566,11 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D()); break; @@ -498,7 +581,7 @@ void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { @@ -507,39 +590,36 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); - } else { - __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); - } + __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); - } else { - __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); - } + __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: @@ -549,7 +629,7 @@ void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { @@ -558,39 +638,36 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); - } else { - __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); - } + __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B()); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); - } else { - __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); - } + __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H()); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H()); break; - case Primitive::kPrimInt: + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } else { - __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); - } + __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S()); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D()); break; default: @@ -600,7 +677,8 @@ void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + // TODO: Allow constants supported by BIC (vector, immediate). + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { @@ -609,14 +687,15 @@ void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -630,11 +709,12 @@ void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) { } void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) { + // TODO: Use BIC (vector, register). LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId(); } void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { @@ -643,14 +723,15 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -660,7 +741,7 @@ void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { @@ -669,14 +750,15 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { VRegister rhs = VRegisterFrom(locations->InAt(1)); VRegister dst = VRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter break; default: @@ -686,14 +768,15 @@ void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -705,7 +788,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { @@ -714,20 +797,21 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Shl(dst.V16B(), lhs.V16B(), value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Shl(dst.V8H(), lhs.V8H(), value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Shl(dst.V4S(), lhs.V4S(), value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Shl(dst.V2D(), lhs.V2D(), value); break; @@ -738,7 +822,7 @@ void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { @@ -747,20 +831,21 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Sshr(dst.V16B(), lhs.V16B(), value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Sshr(dst.V8H(), lhs.V8H(), value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Sshr(dst.V4S(), lhs.V4S(), value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Sshr(dst.V2D(), lhs.V2D(), value); break; @@ -771,7 +856,7 @@ void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { @@ -780,20 +865,21 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { VRegister dst = VRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ Ushr(dst.V16B(), lhs.V16B(), value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Ushr(dst.V8H(), lhs.V8H(), value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ushr(dst.V4S(), lhs.V4S(), value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Ushr(dst.V2D(), lhs.V2D(), value); break; @@ -803,20 +889,92 @@ void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instr); - switch (instr->GetPackedType()) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - locations->SetInAt( - HVecMultiplyAccumulate::kInputAccumulatorIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresFpuRegister()); - locations->SetInAt( - HVecMultiplyAccumulate::kInputMulRightIndex, Location::RequiresFpuRegister()); - DCHECK_EQ(HVecMultiplyAccumulate::kInputAccumulatorIndex, 0); +void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister dst = VRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ Movi(dst.V16B(), 0); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0)); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0)); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0)); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); break; default: @@ -825,35 +983,43 @@ void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* i } } +void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result. // However vector MultiplyAccumulate instruction is not affected. -void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LocationSummary* locations = instr->GetLocations(); - VRegister acc = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputAccumulatorIndex)); - VRegister left = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulLeftIndex)); - VRegister right = VRegisterFrom(locations->InAt(HVecMultiplyAccumulate::kInputMulRightIndex)); - switch (instr->GetPackedType()) { - case Primitive::kPrimByte: - DCHECK_EQ(16u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { +void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V16B(), left.V16B(), right.V16B()); } else { __ Mls(acc.V16B(), left.V16B(), right.V16B()); } break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - DCHECK_EQ(8u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V8H(), left.V8H(), right.V8H()); } else { __ Mls(acc.V8H(), left.V8H(), right.V8H()); } break; - case Primitive::kPrimInt: - DCHECK_EQ(4u, instr->GetVectorLength()); - if (instr->GetOpKind() == HInstruction::kAdd) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { __ Mla(acc.V4S(), left.V4S(), right.V4S()); } else { __ Mls(acc.V4S(), left.V4S(), right.V4S()); @@ -861,23 +1027,207 @@ void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccum break; default: LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); + // Some conversions require temporary registers. + LocationSummary* locations = instruction->GetLocations(); + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case DataType::Type::kInt32: + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + break; + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + if (instruction->GetPackedType() == DataType::Type::kInt64) { + locations->AddTemp(Location::RequiresFpuRegister()); + locations->AddTemp(Location::RequiresFpuRegister()); + } + break; + case DataType::Type::kInt32: + case DataType::Type::kInt64: + if (instruction->GetPackedType() == a->GetPackedType()) { + locations->AddTemp(Location::RequiresFpuRegister()); + } + break; + default: + break; + } +} + +void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VRegister acc = VRegisterFrom(locations->InAt(0)); + VRegister left = VRegisterFrom(locations->InAt(1)); + VRegister right = VRegisterFrom(locations->InAt(2)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Sabal(acc.V8H(), left.V8B(), right.V8B()); + __ Sabal2(acc.V8H(), left.V16B(), right.V16B()); + break; + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + __ Sxtl(tmp1.V8H(), left.V8B()); + __ Sxtl(tmp2.V8H(), right.V8B()); + __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); + __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); + __ Sxtl2(tmp1.V8H(), left.V16B()); + __ Sxtl2(tmp2.V8H(), right.V16B()); + __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H()); + __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H()); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + VRegister tmp3 = VRegisterFrom(locations->GetTemp(2)); + VRegister tmp4 = VRegisterFrom(locations->GetTemp(3)); + __ Sxtl(tmp1.V8H(), left.V8B()); + __ Sxtl(tmp2.V8H(), right.V8B()); + __ Sxtl(tmp3.V4S(), tmp1.V4H()); + __ Sxtl(tmp4.V4S(), tmp2.V4H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp3.V4S(), tmp1.V8H()); + __ Sxtl2(tmp4.V4S(), tmp2.V8H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp1.V8H(), left.V16B()); + __ Sxtl2(tmp2.V8H(), right.V16B()); + __ Sxtl(tmp3.V4S(), tmp1.V4H()); + __ Sxtl(tmp4.V4S(), tmp2.V4H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + __ Sxtl2(tmp3.V4S(), tmp1.V8H()); + __ Sxtl2(tmp4.V4S(), tmp2.V8H()); + __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S()); + __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Sabal(acc.V4S(), left.V4H(), right.V4H()); + __ Sabal2(acc.V4S(), left.V8H(), right.V8H()); + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp1 = VRegisterFrom(locations->GetTemp(0)); + VRegister tmp2 = VRegisterFrom(locations->GetTemp(1)); + __ Sxtl(tmp1.V4S(), left.V4H()); + __ Sxtl(tmp2.V4S(), right.V4H()); + __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); + __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); + __ Sxtl2(tmp1.V4S(), left.V8H()); + __ Sxtl2(tmp2.V4S(), right.V8H()); + __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S()); + __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Sub(tmp.V4S(), left.V4S(), right.V4S()); + __ Abs(tmp.V4S(), tmp.V4S()); + __ Add(acc.V4S(), acc.V4S(), tmp.V4S()); + break; + } + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Sabal(acc.V2D(), left.V2S(), right.V2S()); + __ Sabal2(acc.V2D(), left.V4S(), right.V4S()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VRegister tmp = VRegisterFrom(locations->GetTemp(0)); + __ Sub(tmp.V2D(), left.V2D(), right.V2D()); + __ Abs(tmp.V2D(), tmp.V2D()); + __ Add(acc.V2D(), acc.V2D(), tmp.V2D()); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; } } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -929,18 +1279,19 @@ MemOperand InstructionCodeGeneratorARM64::VecAddress( } void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); } void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VRegister reg = VRegisterFrom(locations->Out()); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -968,13 +1319,13 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { return; } FALLTHROUGH_INTENDED; - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch)); @@ -986,25 +1337,26 @@ void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VRegister reg = VRegisterFrom(locations->InAt(2)); UseScratchRegisterScope temps(GetVIXLAssembler()); Register scratch; switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimFloat: - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch)); diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc index 527691d9d9..7c3155ab73 100644 --- a/compiler/optimizing/code_generator_vector_arm_vixl.cc +++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc @@ -28,18 +28,20 @@ using helpers::Int64ConstantFrom; using helpers::InputDRegisterAt; using helpers::InputRegisterAt; using helpers::OutputDRegister; +using helpers::OutputRegister; using helpers::RegisterFrom; #define __ GetVIXLAssembler()-> void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; @@ -53,17 +55,18 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala LocationSummary* locations = instruction->GetLocations(); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vdup(Untyped8, dst, InputRegisterAt(instruction, 0)); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vdup(Untyped16, dst, InputRegisterAt(instruction, 0)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vdup(Untyped32, dst, InputRegisterAt(instruction, 0)); break; @@ -73,36 +76,48 @@ void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScala } } -void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorARMVIXL::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmov(OutputRegister(instruction), DRegisterLane(src, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), instruction->IsVecNot() ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -112,8 +127,37 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderARMVIXL::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Vpadd(DataTypeValue::I32, dst, src, src); + break; + case HVecReduce::kMin: + __ Vpmin(DataTypeValue::S32, dst, src, src); + break; + case HVecReduce::kMax: + __ Vpmax(DataTypeValue::S32, dst, src, src); + break; + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) { @@ -121,7 +165,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { @@ -129,16 +173,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vneg(DataTypeValue::S8, dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vneg(DataTypeValue::S16, dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vneg(DataTypeValue::S32, dst, src); break; @@ -149,7 +194,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { @@ -157,16 +202,15 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vabs(DataTypeValue::S8, dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vabs(DataTypeValue::S16, dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vabs(DataTypeValue::S32, dst, src); break; @@ -177,7 +221,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) { } void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { @@ -185,15 +229,16 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { vixl32::DRegister src = DRegisterFrom(locations->InAt(0)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: // special case boolean-not + case DataType::Type::kBool: // special case boolean-not DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vmov(I8, dst, 1); __ Veor(dst, dst, src); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Vmvn(I8, dst, src); // lanes do not matter break; default: @@ -203,14 +248,15 @@ void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -222,7 +268,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { @@ -231,16 +277,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vadd(I8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vadd(I16, dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vadd(I32, dst, lhs, rhs); break; @@ -251,7 +298,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -260,30 +307,29 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs) - : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs) - : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::U8, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::S8, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::S8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs) - : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs) - : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::U16, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::U16, dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Vrhadd(DataTypeValue::S16, dst, lhs, rhs) + : __ Vhadd(DataTypeValue::S16, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -292,7 +338,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecHalvingAdd(HVecHalvingAdd* instruc } void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { @@ -301,16 +347,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vsub(I8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vsub(I16, dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vsub(I32, dst, lhs, rhs); break; @@ -321,7 +368,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { @@ -330,16 +377,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vmul(I8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vmul(I16, dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vmul(I32, dst, lhs, rhs); break; @@ -350,7 +398,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) { @@ -358,7 +406,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderARMVIXL::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { @@ -367,30 +415,29 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmin(DataTypeValue::U8, dst, lhs, rhs); - } else { - __ Vmin(DataTypeValue::S8, dst, lhs, rhs); - } + __ Vmin(DataTypeValue::U8, dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vmin(DataTypeValue::S8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmin(DataTypeValue::U16, dst, lhs, rhs); - } else { - __ Vmin(DataTypeValue::S16, dst, lhs, rhs); - } + __ Vmin(DataTypeValue::U16, dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vmin(DataTypeValue::S16, dst, lhs, rhs); + break; + case DataType::Type::kUint32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmin(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmin(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmin(DataTypeValue::U32, dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmin(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -399,7 +446,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderARMVIXL::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { @@ -408,30 +455,29 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmax(DataTypeValue::U8, dst, lhs, rhs); - } else { - __ Vmax(DataTypeValue::S8, dst, lhs, rhs); - } + __ Vmax(DataTypeValue::U8, dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Vmax(DataTypeValue::S8, dst, lhs, rhs); + break; + case DataType::Type::kUint16: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmax(DataTypeValue::U16, dst, lhs, rhs); - } else { - __ Vmax(DataTypeValue::S16, dst, lhs, rhs); - } + __ Vmax(DataTypeValue::U16, dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Vmax(DataTypeValue::S16, dst, lhs, rhs); + break; + case DataType::Type::kUint32: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Vmax(DataTypeValue::U32, dst, lhs, rhs); - } else { - __ Vmax(DataTypeValue::S32, dst, lhs, rhs); - } + __ Vmax(DataTypeValue::U32, dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmax(DataTypeValue::S32, dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -440,7 +486,8 @@ void InstructionCodeGeneratorARMVIXL::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + // TODO: Allow constants supported by VAND (immediate). + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { @@ -449,11 +496,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Vand(I8, dst, lhs, rhs); break; default: @@ -463,7 +511,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { @@ -471,7 +519,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { @@ -480,11 +528,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Vorr(I8, dst, lhs, rhs); break; default: @@ -494,7 +543,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { @@ -503,11 +552,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { vixl32::DRegister rhs = DRegisterFrom(locations->InAt(1)); vixl32::DRegister dst = DRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ Veor(I8, dst, lhs, rhs); break; default: @@ -517,13 +567,14 @@ void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -535,7 +586,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { @@ -544,16 +595,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { vixl32::DRegister dst = DRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vshl(I8, dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vshl(I16, dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vshl(I32, dst, lhs, value); break; @@ -564,7 +616,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { @@ -573,16 +625,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { vixl32::DRegister dst = DRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::S8, dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::S16, dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::S32, dst, lhs, value); break; @@ -593,7 +646,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { @@ -602,16 +655,17 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { vixl32::DRegister dst = DRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::U8, dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::U16, dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); __ Vshr(DataTypeValue::U32, dst, lhs, value); break; @@ -621,12 +675,119 @@ void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister dst = DRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ Vmov(I32, dst, 0); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Vmov(Untyped32, DRegisterLane(dst, 0), InputRegisterAt(instruction, 0)); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } -void InstructionCodeGeneratorARMVIXL::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorARMVIXL::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + vixl32::DRegister acc = DRegisterFrom(locations->InAt(0)); + vixl32::DRegister left = DRegisterFrom(locations->InAt(1)); + vixl32::DRegister right = DRegisterFrom(locations->InAt(2)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(a->GetPackedType(), b->GetPackedType()); + switch (a->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(2u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + UseScratchRegisterScope temps(GetVIXLAssembler()); + vixl32::DRegister tmp = temps.AcquireD(); + __ Vsub(DataTypeValue::I32, tmp, left, right); + __ Vabs(DataTypeValue::S32, tmp, tmp); + __ Vadd(DataTypeValue::I32, acc, acc, tmp); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Return whether the vector memory access operation is guaranteed to be word-aligned (ARM word @@ -636,16 +797,17 @@ static bool IsWordAligned(HVecMemoryOperation* instruction) { } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -671,7 +833,7 @@ MemOperand InstructionCodeGeneratorARMVIXL::VecAddress( vixl32::Register base = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); size_t shift = ComponentSizeShiftWidth(size); @@ -697,7 +859,7 @@ AlignedMemOperand InstructionCodeGeneratorARMVIXL::VecAddressUnaligned( vixl32::Register base = InputRegisterAt(instruction, 0); Location index = locations->InAt(1); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); uint32_t offset = mirror::Array::DataOffset(size).Uint32Value(); size_t shift = ComponentSizeShiftWidth(size); @@ -716,7 +878,7 @@ AlignedMemOperand InstructionCodeGeneratorARMVIXL::VecAddressUnaligned( } void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); } void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { @@ -724,11 +886,12 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register scratch; - DCHECK(instruction->GetPackedType() != Primitive::kPrimChar || !instruction->IsStringCharAt()); + DCHECK(instruction->GetPackedType() != DataType::Type::kUint16 || !instruction->IsStringCharAt()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); @@ -738,8 +901,8 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { VecAddressUnaligned(instruction, &temps, &scratch)); } break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); @@ -749,7 +912,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { VecAddressUnaligned(instruction, &temps, &scratch)); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vldr(reg, VecAddress(instruction, &temps, &scratch)); @@ -766,7 +929,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { @@ -774,8 +937,9 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register scratch; switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(8u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); @@ -785,8 +949,8 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { VecAddressUnaligned(instruction, &temps, &scratch)); } break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(4u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); @@ -796,7 +960,7 @@ void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) { VecAddressUnaligned(instruction, &temps, &scratch)); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(2u, instruction->GetVectorLength()); if (IsWordAligned(instruction)) { __ Vstr(reg, VecAddress(instruction, &temps, &scratch)); diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc index ea36e90112..ed9de96496 100644 --- a/compiler/optimizing/code_generator_vector_mips.cc +++ b/compiler/optimizing/code_generator_vector_mips.cc @@ -24,19 +24,20 @@ namespace mips { #define __ down_cast<MipsAssembler*>(GetAssembler())-> // NOLINT void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -50,33 +51,38 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* LocationSummary* locations = instruction->GetLocations(); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, locations->InAt(0).AsRegister<Register>()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, locations->InAt(0).AsRegister<Register>()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, locations->InAt(0).AsRegister<Register>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ Mtc1(locations->InAt(0).AsRegisterPairLow<Register>(), FTMP); - __ MoveToFpuHigh(locations->InAt(0).AsRegisterPairHigh<Register>(), FTMP); + __ InsertW(static_cast<VectorRegister>(FTMP), + locations->InAt(0).AsRegisterPairLow<Register>(), + 0); + __ InsertW(static_cast<VectorRegister>(FTMP), + locations->InAt(0).AsRegisterPairHigh<Register>(), + 1); __ ReplicateFPToVectorRegister(dst, FTMP, /* is_double */ true); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), /* is_double */ false); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FRegister>(), @@ -88,42 +94,78 @@ void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* } } -void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorMIPS::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Copy_sW(locations->Out().AsRegister<Register>(), src, 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Copy_sW(locations->Out().AsRegisterPairLow<Register>(), src, 0); + __ Copy_sW(locations->Out().AsRegisterPairHigh<Register>(), src, 1); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + DataType::Type type = instruction->GetPackedType(); + switch (type) { + case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), instruction->IsVecNot() ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs()) + (instruction->IsVecNeg() || instruction->IsVecAbs() || + (instruction->IsVecReduce() && type == DataType::Type::kInt64)) ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; @@ -133,17 +175,72 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderMIPS::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Hadd_sD(tmp, src, src); + __ IlvlD(dst, tmp, tmp); + __ AddvW(dst, dst, tmp); + break; + case HVecReduce::kMin: + __ IlvodW(tmp, src, src); + __ Min_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Min_sW(dst, dst, tmp); + break; + case HVecReduce::kMax: + __ IlvodW(tmp, src, src); + __ Max_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Max_sW(dst, dst, tmp); + break; + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ IlvlD(dst, src, src); + __ AddvD(dst, dst, src); + break; + case HVecReduce::kMin: + __ IlvlD(dst, src, src); + __ Min_sD(dst, dst, src); + break; + case HVecReduce::kMax: + __ IlvlD(dst, src, src); + __ Max_sD(dst, dst, src); + break; + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); - Primitive::Type from = instruction->GetInputType(); - Primitive::Type to = instruction->GetResultType(); - if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DataType::Type from = instruction->GetInputType(); + DataType::Type to = instruction->GetResultType(); + if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { @@ -152,7 +249,7 @@ void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { @@ -160,33 +257,34 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, ZERO); __ SubvB(dst, dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, ZERO); __ SubvH(dst, dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ SubvW(dst, dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ SubvD(dst, dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ FsubW(dst, dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ FsubD(dst, dst, src); @@ -198,7 +296,7 @@ void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { @@ -206,34 +304,33 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, ZERO); // all zeroes __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, ZERO); // all zeroes __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); // all zeroes __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillW(dst, ZERO); // all zeroes __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ LdiW(dst, -1); // all ones __ SrliW(dst, dst, 1); __ AndV(dst, dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ LdiD(dst, -1); // all ones __ SrliD(dst, dst, 1); @@ -246,7 +343,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) { } void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { @@ -254,18 +351,19 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: // special case boolean-not + case DataType::Type::kBool: // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); __ LdiB(dst, 1); __ XorV(dst, dst, src); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ NorV(dst, src, src); // lanes do not matter @@ -277,17 +375,18 @@ void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -299,7 +398,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { @@ -308,28 +407,29 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ AddvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ AddvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ AddvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ AddvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FaddW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FaddD(dst, lhs, rhs); break; @@ -340,7 +440,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -349,30 +449,29 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Aver_uB(dst, lhs, rhs) - : __ Ave_uB(dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Aver_sB(dst, lhs, rhs) - : __ Ave_sB(dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Aver_uB(dst, lhs, rhs) + : __ Ave_uB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Aver_sB(dst, lhs, rhs) + : __ Ave_sB(dst, lhs, rhs); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Aver_uH(dst, lhs, rhs) - : __ Ave_uH(dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Aver_sH(dst, lhs, rhs) - : __ Ave_sH(dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Aver_uH(dst, lhs, rhs) + : __ Ave_uH(dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Aver_sH(dst, lhs, rhs) + : __ Ave_sH(dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -381,7 +480,7 @@ void InstructionCodeGeneratorMIPS::VisitVecHalvingAdd(HVecHalvingAdd* instructio } void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { @@ -390,28 +489,29 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SubvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SubvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SubvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SubvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FsubW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FsubD(dst, lhs, rhs); break; @@ -422,7 +522,7 @@ void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { @@ -431,28 +531,29 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ MulvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ MulvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ MulvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ MulvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FmulW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FmulD(dst, lhs, rhs); break; @@ -463,7 +564,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { @@ -472,11 +573,11 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FdivW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FdivD(dst, lhs, rhs); break; @@ -487,7 +588,7 @@ void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderMIPS::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { @@ -496,49 +597,46 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uB(dst, lhs, rhs); - } else { - __ Min_sB(dst, lhs, rhs); - } + __ Min_uB(dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Min_sB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uH(dst, lhs, rhs); - } else { - __ Min_sH(dst, lhs, rhs); - } + __ Min_uH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Min_sH(dst, lhs, rhs); + break; + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_uW(dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_sW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kUint64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_uD(dst, lhs, rhs); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -548,7 +646,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderMIPS::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { @@ -557,49 +655,46 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uB(dst, lhs, rhs); - } else { - __ Max_sB(dst, lhs, rhs); - } + __ Max_uB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Max_sB(dst, lhs, rhs); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uH(dst, lhs, rhs); - } else { - __ Max_sH(dst, lhs, rhs); - } + __ Max_uH(dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Max_sH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_uW(dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_sW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kUint64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_uD(dst, lhs, rhs); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: @@ -609,7 +704,7 @@ void InstructionCodeGeneratorMIPS::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { @@ -618,14 +713,15 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ AndV(dst, lhs, rhs); // lanes do not matter @@ -637,7 +733,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) { @@ -645,7 +741,7 @@ void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { @@ -654,14 +750,15 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ OrV(dst, lhs, rhs); // lanes do not matter @@ -673,7 +770,7 @@ void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { @@ -682,14 +779,15 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ XorV(dst, lhs, rhs); // lanes do not matter @@ -701,14 +799,15 @@ void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -720,7 +819,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { @@ -729,20 +828,21 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SlliB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SlliH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SlliW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SlliD(dst, lhs, value); break; @@ -753,7 +853,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { @@ -762,20 +862,21 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SraiB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SraiH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SraiW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SraiD(dst, lhs, value); break; @@ -786,7 +887,7 @@ void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { @@ -795,20 +896,21 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SrliB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SrliH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SrliW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SrliD(dst, lhs, value); break; @@ -818,28 +920,359 @@ void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ FillW(dst, ZERO); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ InsertB(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ InsertH(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ InsertW(dst, locations->InAt(0).AsRegister<Register>(), 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ InsertW(dst, locations->InAt(0).AsRegisterPairLow<Register>(), 0); + __ InsertW(dst, locations->InAt(0).AsRegisterPairHigh<Register>(), 1); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); + LocationSummary* locations = instruction->GetLocations(); + // All conversions require at least one temporary register. + locations->AddTemp(Location::RequiresFpuRegister()); + // Some conversions require a second temporary register. + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kInt32: + if (instruction->GetPackedType() == DataType::Type::kInt32) { + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + break; + } +} + +void InstructionCodeGeneratorMIPS::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { + DCHECK_EQ(8u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ AddvH(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ AddvH(acc, acc, tmp1); + break; + } + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ AddvW(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillH(tmp, ZERO); + __ Hadd_sW(tmp1, left, tmp); + __ Hadd_sW(tmp2, right, tmp); + __ Asub_sW(tmp1, tmp1, tmp2); + __ AddvW(acc, acc, tmp1); + __ Hadd_sW(tmp1, tmp, left); + __ Hadd_sW(tmp2, tmp, right); + __ Asub_sW(tmp1, tmp1, tmp2); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillH(tmp, ZERO); + __ Hadd_sW(tmp1, left, tmp); + __ Hadd_sW(tmp2, right, tmp); + __ Asub_sW(tmp1, tmp1, tmp2); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + __ Hadd_sW(tmp1, tmp, left); + __ Hadd_sW(tmp2, tmp, right); + __ Asub_sW(tmp1, tmp1, tmp2); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(tmp, ZERO); + __ SubvW(tmp1, left, right); + __ Add_aW(tmp1, tmp1, tmp); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillW(tmp, ZERO); + __ Hadd_sD(tmp1, left, tmp); + __ Hadd_sD(tmp2, right, tmp); + __ Asub_sD(tmp1, tmp1, tmp2); + __ AddvD(acc, acc, tmp1); + __ Hadd_sD(tmp1, tmp, left); + __ Hadd_sD(tmp2, tmp, right); + __ Asub_sD(tmp1, tmp1, tmp2); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillW(tmp, ZERO); + __ SubvD(tmp1, left, right); + __ Add_aD(tmp1, tmp1, tmp); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -887,23 +1320,24 @@ int32_t InstructionCodeGeneratorMIPS::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); } void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VectorRegister reg = VectorRegisterFrom(locations->Out()); Register base; int32_t offset = VecAddress(locations, size, &base); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ LdB(reg, base, offset); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned // loads and stores. @@ -912,13 +1346,13 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ LdH(reg, base, offset); break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ LdW(reg, base, offset); break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ LdD(reg, base, offset); break; @@ -929,33 +1363,34 @@ void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); } void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); Register base; int32_t offset = VecAddress(locations, size, &base); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ StB(reg, base, offset); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ StH(reg, base, offset); break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ StW(reg, base, offset); break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ StD(reg, base, offset); break; diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc index 0395db1df9..9ea55ec8d7 100644 --- a/compiler/optimizing/code_generator_vector_mips64.cc +++ b/compiler/optimizing/code_generator_vector_mips64.cc @@ -29,19 +29,20 @@ VectorRegister VectorRegisterFrom(Location location) { } void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -55,31 +56,32 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar LocationSummary* locations = instruction->GetLocations(); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>()); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), /* is_double */ false); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ ReplicateFPToVectorRegister(dst, locations->InAt(0).AsFpuRegister<FpuRegister>(), @@ -91,42 +93,77 @@ void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar } } -void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); -} - -void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorMIPS64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Copy_sW(locations->Out().AsRegister<GpuRegister>(), src, 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Copy_sD(locations->Out().AsRegister<GpuRegister>(), src, 0); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); - switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + DataType::Type type = instruction->GetPackedType(); + switch (type) { + case DataType::Type::kBool: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), instruction->IsVecNot() ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), - (instruction->IsVecNeg() || instruction->IsVecAbs()) + (instruction->IsVecNeg() || instruction->IsVecAbs() || + (instruction->IsVecReduce() && type == DataType::Type::kInt64)) ? Location::kOutputOverlap : Location::kNoOutputOverlap); break; @@ -136,17 +173,72 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } } +void LocationsBuilderMIPS64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister src = VectorRegisterFrom(locations->InAt(0)); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ Hadd_sD(tmp, src, src); + __ IlvlD(dst, tmp, tmp); + __ AddvW(dst, dst, tmp); + break; + case HVecReduce::kMin: + __ IlvodW(tmp, src, src); + __ Min_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Min_sW(dst, dst, tmp); + break; + case HVecReduce::kMax: + __ IlvodW(tmp, src, src); + __ Max_sW(tmp, src, tmp); + __ IlvlW(dst, tmp, tmp); + __ Max_sW(dst, dst, tmp); + break; + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ IlvlD(dst, src, src); + __ AddvD(dst, dst, src); + break; + case HVecReduce::kMin: + __ IlvlD(dst, src, src); + __ Min_sD(dst, dst, src); + break; + case HVecReduce::kMax: + __ IlvlD(dst, src, src); + __ Max_sD(dst, dst, src); + break; + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); - Primitive::Type from = instruction->GetInputType(); - Primitive::Type to = instruction->GetResultType(); - if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DataType::Type from = instruction->GetInputType(); + DataType::Type to = instruction->GetResultType(); + if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ Ffint_sW(dst, src); } else { @@ -156,7 +248,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { @@ -164,33 +256,34 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, ZERO); __ SubvB(dst, dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, ZERO); __ SubvH(dst, dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ SubvW(dst, dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillD(dst, ZERO); __ SubvD(dst, dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); __ FsubW(dst, dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillD(dst, ZERO); __ FsubD(dst, dst, src); @@ -202,7 +295,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { @@ -210,34 +303,33 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ FillB(dst, ZERO); // all zeroes __ Add_aB(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ FillH(dst, ZERO); // all zeroes __ Add_aH(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FillW(dst, ZERO); // all zeroes __ Add_aW(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FillD(dst, ZERO); // all zeroes __ Add_aD(dst, dst, src); // dst = abs(0) + abs(src) break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ LdiW(dst, -1); // all ones __ SrliW(dst, dst, 1); __ AndV(dst, dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ LdiD(dst, -1); // all ones __ SrliD(dst, dst, 1); @@ -250,7 +342,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) { } void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { @@ -258,18 +350,19 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { VectorRegister src = VectorRegisterFrom(locations->InAt(0)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: // special case boolean-not + case DataType::Type::kBool: // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); __ LdiB(dst, 1); __ XorV(dst, dst, src); break; - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ NorV(dst, src, src); // lanes do not matter @@ -281,17 +374,18 @@ void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -303,7 +397,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { @@ -312,28 +406,29 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ AddvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ AddvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ AddvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ AddvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FaddW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FaddD(dst, lhs, rhs); break; @@ -344,7 +439,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -353,30 +448,29 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Aver_uB(dst, lhs, rhs) - : __ Ave_uB(dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Aver_sB(dst, lhs, rhs) - : __ Ave_sB(dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Aver_uB(dst, lhs, rhs) + : __ Ave_uB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Aver_sB(dst, lhs, rhs) + : __ Ave_sB(dst, lhs, rhs); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - instruction->IsRounded() - ? __ Aver_uH(dst, lhs, rhs) - : __ Ave_uH(dst, lhs, rhs); - } else { - instruction->IsRounded() - ? __ Aver_sH(dst, lhs, rhs) - : __ Ave_sH(dst, lhs, rhs); - } + instruction->IsRounded() + ? __ Aver_uH(dst, lhs, rhs) + : __ Ave_uH(dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + instruction->IsRounded() + ? __ Aver_sH(dst, lhs, rhs) + : __ Ave_sH(dst, lhs, rhs); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -385,7 +479,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruct } void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { @@ -394,28 +488,29 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SubvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SubvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SubvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SubvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FsubW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FsubD(dst, lhs, rhs); break; @@ -426,7 +521,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { @@ -435,28 +530,29 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ MulvB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ MulvH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ MulvW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ MulvD(dst, lhs, rhs); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FmulW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FmulD(dst, lhs, rhs); break; @@ -467,7 +563,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { @@ -476,11 +572,11 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ FdivW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ FdivD(dst, lhs, rhs); break; @@ -491,7 +587,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { @@ -500,49 +596,46 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uB(dst, lhs, rhs); - } else { - __ Min_sB(dst, lhs, rhs); - } + __ Min_uB(dst, lhs, rhs); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Min_sB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uH(dst, lhs, rhs); - } else { - __ Min_sH(dst, lhs, rhs); - } + __ Min_uH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Min_sH(dst, lhs, rhs); + break; + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uW(dst, lhs, rhs); - } else { - __ Min_sW(dst, lhs, rhs); - } + __ Min_uW(dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Min_sW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kUint64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Min_uD(dst, lhs, rhs); - } else { - __ Min_sD(dst, lhs, rhs); - } + __ Min_uD(dst, lhs, rhs); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Min_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value. // TODO: Fix min(x, NaN) cases for float and double. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FminD(dst, lhs, rhs); break; default: @@ -552,7 +645,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { @@ -561,49 +654,46 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uB(dst, lhs, rhs); - } else { - __ Max_sB(dst, lhs, rhs); - } + __ Max_uB(dst, lhs, rhs); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ Max_sB(dst, lhs, rhs); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uH(dst, lhs, rhs); - } else { - __ Max_sH(dst, lhs, rhs); - } + __ Max_uH(dst, lhs, rhs); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ Max_sH(dst, lhs, rhs); break; - case Primitive::kPrimInt: + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uW(dst, lhs, rhs); - } else { - __ Max_sW(dst, lhs, rhs); - } + __ Max_uW(dst, lhs, rhs); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ Max_sW(dst, lhs, rhs); break; - case Primitive::kPrimLong: + case DataType::Type::kUint64: DCHECK_EQ(2u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ Max_uD(dst, lhs, rhs); - } else { - __ Max_sD(dst, lhs, rhs); - } + __ Max_uD(dst, lhs, rhs); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ Max_sD(dst, lhs, rhs); break; // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value. // TODO: Fix max(x, NaN) cases for float and double. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxW(dst, lhs, rhs); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ FmaxD(dst, lhs, rhs); break; default: @@ -613,7 +703,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { @@ -622,14 +712,15 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ AndV(dst, lhs, rhs); // lanes do not matter @@ -641,7 +732,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) { @@ -649,7 +740,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { @@ -658,14 +749,15 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ OrV(dst, lhs, rhs); // lanes do not matter @@ -677,7 +769,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { @@ -686,14 +778,15 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { VectorRegister rhs = VectorRegisterFrom(locations->InAt(1)); VectorRegister dst = VectorRegisterFrom(locations->Out()); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ XorV(dst, lhs, rhs); // lanes do not matter @@ -705,14 +798,15 @@ void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -724,7 +818,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { @@ -733,20 +827,21 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SlliB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SlliH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SlliW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SlliD(dst, lhs, value); break; @@ -757,7 +852,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { @@ -766,20 +861,21 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SraiB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SraiH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SraiW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SraiD(dst, lhs, value); break; @@ -790,7 +886,7 @@ void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { @@ -799,20 +895,21 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { VectorRegister dst = VectorRegisterFrom(locations->Out()); int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ SrliB(dst, lhs, value); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ SrliH(dst, lhs, value); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ SrliW(dst, lhs, value); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ SrliD(dst, lhs, value); break; @@ -822,28 +919,358 @@ void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister dst = VectorRegisterFrom(locations->Out()); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ FillW(dst, ZERO); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ InsertB(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ InsertH(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ InsertW(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ InsertD(dst, locations->InAt(0).AsRegister<GpuRegister>(), 0); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvB(acc, left, right); + } else { + __ MsubvB(acc, left, right); + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvH(acc, left, right); + } else { + __ MsubvH(acc, left, right); + } + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvW(acc, left, right); + } else { + __ MsubvW(acc, left, right); + } + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + if (instruction->GetOpKind() == HInstruction::kAdd) { + __ MaddvD(acc, left, right); + } else { + __ MsubvD(acc, left, right); + } + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); + LocationSummary* locations = instruction->GetLocations(); + // All conversions require at least one temporary register. + locations->AddTemp(Location::RequiresFpuRegister()); + // Some conversions require a second temporary register. + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kInt32: + if (instruction->GetPackedType() == DataType::Type::kInt32) { + break; + } + FALLTHROUGH_INTENDED; + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + locations->AddTemp(Location::RequiresFpuRegister()); + break; + default: + break; + } +} + +void InstructionCodeGeneratorMIPS64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + LocationSummary* locations = instruction->GetLocations(); + VectorRegister acc = VectorRegisterFrom(locations->InAt(0)); + VectorRegister left = VectorRegisterFrom(locations->InAt(1)); + VectorRegister right = VectorRegisterFrom(locations->InAt(2)); + VectorRegister tmp = static_cast<VectorRegister>(FTMP); + VectorRegister tmp1 = VectorRegisterFrom(locations->GetTemp(0)); + + DCHECK(locations->InAt(0).Equals(locations->Out())); + + // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S). + HVecOperation* a = instruction->InputAt(1)->AsVecOperation(); + HVecOperation* b = instruction->InputAt(2)->AsVecOperation(); + DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()), + HVecOperation::ToSignedType(b->GetPackedType())); + switch (a->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + DCHECK_EQ(16u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { + DCHECK_EQ(8u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ AddvH(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ AddvH(acc, acc, tmp1); + break; + } + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ AddvW(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillB(tmp, ZERO); + __ Hadd_sH(tmp1, left, tmp); + __ Hadd_sH(tmp2, right, tmp); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + __ Hadd_sH(tmp1, tmp, left); + __ Hadd_sH(tmp2, tmp, right); + __ Asub_sH(tmp1, tmp1, tmp2); + __ Hadd_sW(tmp1, tmp1, tmp1); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK_EQ(8u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillH(tmp, ZERO); + __ Hadd_sW(tmp1, left, tmp); + __ Hadd_sW(tmp2, right, tmp); + __ Asub_sW(tmp1, tmp1, tmp2); + __ AddvW(acc, acc, tmp1); + __ Hadd_sW(tmp1, tmp, left); + __ Hadd_sW(tmp2, tmp, right); + __ Asub_sW(tmp1, tmp1, tmp2); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillH(tmp, ZERO); + __ Hadd_sW(tmp1, left, tmp); + __ Hadd_sW(tmp2, right, tmp); + __ Asub_sW(tmp1, tmp1, tmp2); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + __ Hadd_sW(tmp1, tmp, left); + __ Hadd_sW(tmp2, tmp, right); + __ Asub_sW(tmp1, tmp1, tmp2); + __ Hadd_sD(tmp1, tmp1, tmp1); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt32: { + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ FillW(tmp, ZERO); + __ SubvW(tmp1, left, right); + __ Add_aW(tmp1, tmp1, tmp); + __ AddvW(acc, acc, tmp1); + break; + } + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + VectorRegister tmp2 = VectorRegisterFrom(locations->GetTemp(1)); + __ FillW(tmp, ZERO); + __ Hadd_sD(tmp1, left, tmp); + __ Hadd_sD(tmp2, right, tmp); + __ Asub_sD(tmp1, tmp1, tmp2); + __ AddvD(acc, acc, tmp1); + __ Hadd_sD(tmp1, tmp, left); + __ Hadd_sD(tmp2, tmp, right); + __ Asub_sD(tmp1, tmp1, tmp2); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, a->GetVectorLength()); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ FillD(tmp, ZERO); + __ SubvD(tmp1, left, right); + __ Add_aD(tmp1, tmp1, tmp); + __ AddvD(acc, acc, tmp1); + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } + break; + } + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -891,23 +1318,24 @@ int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations, } void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ true); } void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VectorRegister reg = VectorRegisterFrom(locations->Out()); GpuRegister base; int32_t offset = VecAddress(locations, size, &base); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ LdB(reg, base, offset); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned // loads and stores. @@ -916,13 +1344,13 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { DCHECK_EQ(8u, instruction->GetVectorLength()); __ LdH(reg, base, offset); break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ LdW(reg, base, offset); break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ LdD(reg, base, offset); break; @@ -933,33 +1361,34 @@ void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /* is_load */ false); } void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); VectorRegister reg = VectorRegisterFrom(locations->InAt(2)); GpuRegister base; int32_t offset = VecAddress(locations, size, &base); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ StB(reg, base, offset); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ StH(reg, base, offset); break; - case Primitive::kPrimInt: - case Primitive::kPrimFloat: + case DataType::Type::kInt32: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ StW(reg, base, offset); break; - case Primitive::kPrimLong: - case Primitive::kPrimDouble: + case DataType::Type::kInt64: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ StD(reg, base, offset); break; diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc index 14782d70a1..4945328e2b 100644 --- a/compiler/optimizing/code_generator_vector_x86.cc +++ b/compiler/optimizing/code_generator_vector_x86.cc @@ -15,7 +15,9 @@ */ #include "code_generator_x86.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace x86 { @@ -24,24 +26,32 @@ namespace x86 { #define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); switch (instruction->GetPackedType()) { - case Primitive::kPrimLong: - // Long needs extra temporary to load the register pair. - locations->AddTemp(Location::RequiresFpuRegister()); + case DataType::Type::kInt64: + // Long needs extra temporary to load from the register pair. + if (!is_zero) { + locations->AddTemp(Location::RequiresFpuRegister()); + } FALLTHROUGH_INTENDED; - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - locations->SetInAt(0, Location::RequiresRegister()); + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -51,46 +61,54 @@ void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instructi void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); - XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + __ xorps(dst, dst); + return; + } + switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ punpcklbw(reg, reg); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ punpcklbw(dst, dst); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<Register>()); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimLong: { - XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + case DataType::Type::kInt64: { DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); - __ punpckldq(reg, tmp); - __ punpcklqdq(reg, reg); + __ punpckldq(dst, tmp); + __ punpcklqdq(dst, dst); break; } - case Primitive::kPrimFloat: - DCHECK(locations->InAt(0).Equals(locations->Out())); + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ shufps(reg, reg, Immediate(0)); - break; - case Primitive::kPrimDouble: DCHECK(locations->InAt(0).Equals(locations->Out())); + __ shufps(dst, dst, Immediate(0)); + break; + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ shufpd(reg, reg, Immediate(0)); + DCHECK(locations->InAt(0).Equals(locations->Out())); + __ shufpd(dst, dst, Immediate(0)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -98,37 +116,152 @@ void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* i } } -void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: + // Long needs extra temporary to store into the register pair. + locations->AddTemp(Location::RequiresFpuRegister()); + FALLTHROUGH_INTENDED; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorX86::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case DataType::Type::kInt32: + DCHECK_LE(4u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 16u); + __ movd(locations->Out().AsRegister<Register>(), src); + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movd(locations->Out().AsRegisterPairLow<Register>(), src); + __ pshufd(tmp, src, Immediate(1)); + __ movd(locations->Out().AsRegisterPairHigh<Register>(), tmp); + break; + } + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); + // Long reduction or min/max require a temporary. + if (instruction->GetPackedType() == DataType::Type::kInt64 || + instruction->GetKind() == HVecReduce::kMin || + instruction->GetKind() == HVecReduce::kMax) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } -// Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +void InstructionCodeGeneratorX86::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(dst, src); + __ phaddd(dst, dst); + __ phaddd(dst, dst); + break; + case HVecReduce::kMin: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pminsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pminsd(dst, tmp); + break; + } + case HVecReduce::kMax: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pmaxsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pmaxsd(dst, tmp); + break; + } + } break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(tmp, src); + __ movaps(dst, src); + __ punpckhqdq(tmp, tmp); + __ paddq(dst, tmp); + break; + case HVecReduce::kMin: + case HVecReduce::kMax: + LOG(FATAL) << "Unsupported SIMD type"; + } + break; + } default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -136,16 +269,16 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); - Primitive::Type from = instruction->GetInputType(); - Primitive::Type to = instruction->GetResultType(); - if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DataType::Type from = instruction->GetInputType(); + DataType::Type to = instruction->GetResultType(); + if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { @@ -154,7 +287,7 @@ void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { @@ -162,33 +295,34 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, dst); __ subps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, dst); __ subpd(dst, src); @@ -200,9 +334,9 @@ void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Integral-abs requires a temporary for the comparison. - if (instruction->GetPackedType() == Primitive::kPrimInt) { + if (instruction->GetPackedType() == DataType::Type::kInt32) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -212,7 +346,7 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { DCHECK_EQ(4u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movaps(dst, src); @@ -222,13 +356,13 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { __ psubd(dst, tmp); break; } - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrld(dst, Immediate(1)); __ andps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrlq(dst, Immediate(1)); @@ -241,9 +375,9 @@ void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) { } void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Boolean-not requires a temporary to construct the 16 x one. - if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + if (instruction->GetPackedType() == DataType::Type::kBool) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -253,7 +387,7 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: { // special case boolean-not + case DataType::Type::kBool: { // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ pxor(dst, dst); @@ -262,22 +396,23 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { __ pxor(dst, src); break; } - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pcmpeqb(dst, dst); // all ones __ pxor(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorpd(dst, src); @@ -289,17 +424,18 @@ void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); @@ -311,7 +447,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { @@ -320,28 +456,29 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ paddb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ paddw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ paddd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ paddq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ addps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ addpd(dst, src); break; @@ -352,7 +489,7 @@ void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -362,15 +499,13 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); DCHECK(instruction->IsRounded()); - DCHECK(instruction->IsUnsigned()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pavgb(dst, src); return; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); return; @@ -381,7 +516,7 @@ void InstructionCodeGeneratorX86::VisitVecHalvingAdd(HVecHalvingAdd* instruction } void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { @@ -390,28 +525,29 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ psubb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psubw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psubd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psubq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ subps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ subpd(dst, src); break; @@ -422,7 +558,7 @@ void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { @@ -431,20 +567,20 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmullw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pmulld(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ mulps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ mulpd(dst, src); break; @@ -455,7 +591,7 @@ void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { @@ -464,11 +600,11 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ divps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ divpd(dst, src); break; @@ -479,7 +615,7 @@ void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderX86::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { @@ -488,40 +624,37 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminub(dst, src); - } else { - __ pminsb(dst, src); - } + __ pminub(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pminsb(dst, src); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminuw(dst, src); - } else { - __ pminsw(dst, src); - } + __ pminuw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pminsw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminud(dst, src); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -531,7 +664,7 @@ void InstructionCodeGeneratorX86::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderX86::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { @@ -540,40 +673,37 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxub(dst, src); - } else { - __ pmaxsb(dst, src); - } + __ pmaxub(dst, src); + break; + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pmaxsb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxuw(dst, src); - } else { - __ pmaxsw(dst, src); - } + __ pmaxuw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmaxsw(dst, src); + break; + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxud(dst, src); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: @@ -583,7 +713,7 @@ void InstructionCodeGeneratorX86::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { @@ -592,21 +722,22 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pand(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andpd(dst, src); break; @@ -617,7 +748,7 @@ void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { @@ -626,21 +757,22 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pandn(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andnps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andnpd(dst, src); break; @@ -651,7 +783,7 @@ void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { @@ -660,21 +792,22 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ por(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ orps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ orpd(dst, src); break; @@ -685,7 +818,7 @@ void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { @@ -694,21 +827,22 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pxor(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, src); break; @@ -719,13 +853,13 @@ void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::SameAsFirstInput()); @@ -737,7 +871,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { @@ -746,16 +880,16 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psllw(dst, Immediate(static_cast<uint8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pslld(dst, Immediate(static_cast<uint8_t>(value))); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psllq(dst, Immediate(static_cast<uint8_t>(value))); break; @@ -766,7 +900,7 @@ void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { @@ -775,12 +909,12 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psraw(dst, Immediate(static_cast<uint8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrad(dst, Immediate(static_cast<uint8_t>(value))); break; @@ -791,7 +925,7 @@ void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { @@ -800,16 +934,16 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psrlw(dst, Immediate(static_cast<uint8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrld(dst, Immediate(static_cast<uint8_t>(value))); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psrlq(dst, Immediate(static_cast<uint8_t>(value))); break; @@ -819,28 +953,147 @@ void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kInt64: + // Long needs extra temporary to load from register pairs. + if (!is_zero) { + locations->AddTemp(Location::RequiresFpuRegister()); + } + FALLTHROUGH_INTENDED; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ xorps(dst, dst); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<Register>()); + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ xorps(tmp, tmp); + __ movd(dst, locations->InAt(0).AsRegisterPairLow<Register>()); + __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>()); + __ punpckldq(dst, tmp); + break; + } + case DataType::Type::kFloat32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movss(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); + break; + case DataType::Type::kFloat64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movsd(dst, locations->InAt(1).AsFpuRegister<XmmRegister>()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + // TODO: psadbw for unsigned? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -866,6 +1119,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin case 8: scale = TIMES_8; break; default: break; } + // Incorporate the string or array offset in the address computation. uint32_t offset = is_string_char_at ? mirror::String::ValueOffset().Uint32Value() : mirror::Array::DataOffset(size).Uint32Value(); @@ -873,7 +1127,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin } void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); // String load requires a temporary for the compressed load. if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); @@ -882,12 +1136,13 @@ void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) { void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); Address address = VecAddress(locations, size, instruction->IsStringCharAt()); XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -900,7 +1155,7 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { __ testb(Address(locations->InAt(0).AsRegister<Register>(), count_offset), Immediate(1)); __ j(kNotZero, ¬_compressed); // Zero extend 8 compressed bytes into 8 chars. - __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); __ pxor(tmp, tmp); __ punpcklbw(reg, tmp); __ jmp(&done); @@ -911,20 +1166,20 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { return; } FALLTHROUGH_INTENDED; - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; @@ -935,31 +1190,32 @@ void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); Address address = VecAddress(locations, size, /*is_string_char_at*/ false); XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc index 246044ebb8..a77c7d6838 100644 --- a/compiler/optimizing/code_generator_vector_x86_64.cc +++ b/compiler/optimizing/code_generator_vector_x86_64.cc @@ -15,7 +15,9 @@ */ #include "code_generator_x86_64.h" + #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace x86_64 { @@ -24,21 +26,27 @@ namespace x86_64 { #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - locations->SetInAt(0, Location::RequiresRegister()); + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::SameAsFirstInput()); + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(is_zero ? Location::RequiresFpuRegister() + : Location::SameAsFirstInput()); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -48,42 +56,50 @@ void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instru void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) { LocationSummary* locations = instruction->GetLocations(); - XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + __ xorps(dst, dst); + return; + } + switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ punpcklbw(reg, reg); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); + __ punpcklbw(dst, dst); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ punpcklwd(reg, reg); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); + __ punpcklwd(dst, dst); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); - __ pshufd(reg, reg, Immediate(0)); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ false); + __ pshufd(dst, dst, Immediate(0)); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit - __ punpcklqdq(reg, reg); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>(), /*64-bit*/ true); + __ punpcklqdq(dst, dst); break; - case Primitive::kPrimFloat: - DCHECK(locations->InAt(0).Equals(locations->Out())); + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - __ shufps(reg, reg, Immediate(0)); - break; - case Primitive::kPrimDouble: DCHECK(locations->InAt(0).Equals(locations->Out())); + __ shufps(dst, dst, Immediate(0)); + break; + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - __ shufpd(reg, reg, Immediate(0)); + DCHECK(locations->InAt(0).Equals(locations->Out())); + __ shufpd(dst, dst, Immediate(0)); break; default: LOG(FATAL) << "Unsupported SIMD type"; @@ -91,37 +107,144 @@ void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar } } -void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void InstructionCodeGeneratorX86_64::VisitVecExtractScalar(HVecExtractScalar* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ false); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(locations->Out().AsRegister<CpuRegister>(), src, /*64-bit*/ true); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + DCHECK_LE(2u, instruction->GetVectorLength()); + DCHECK_LE(instruction->GetVectorLength(), 4u); + DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +// Helper to set up locations for vector unary operations. +static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } } -void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) { - LOG(FATAL) << "No SIMD for " << instruction->GetId(); +void LocationsBuilderX86_64::VisitVecReduce(HVecReduce* instruction) { + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); + // Long reduction or min/max require a temporary. + if (instruction->GetPackedType() == DataType::Type::kInt64 || + instruction->GetKind() == HVecReduce::kMin || + instruction->GetKind() == HVecReduce::kMax) { + instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); + } } -// Helper to set up locations for vector unary operations. -static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +void InstructionCodeGeneratorX86_64::VisitVecReduce(HVecReduce* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - locations->SetInAt(0, Location::RequiresFpuRegister()); - locations->SetOut(Location::RequiresFpuRegister()); + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(dst, src); + __ phaddd(dst, dst); + __ phaddd(dst, dst); + break; + case HVecReduce::kMin: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pminsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pminsd(dst, tmp); + break; + } + case HVecReduce::kMax: { + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + __ movaps(tmp, src); + __ movaps(dst, src); + __ psrldq(tmp, Immediate(8)); + __ pmaxsd(dst, tmp); + __ psrldq(tmp, Immediate(4)); + __ pmaxsd(dst, tmp); + break; + } + } + break; + case DataType::Type::kInt64: { + DCHECK_EQ(2u, instruction->GetVectorLength()); + XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); + switch (instruction->GetKind()) { + case HVecReduce::kSum: + __ movaps(tmp, src); + __ movaps(dst, src); + __ punpckhqdq(tmp, tmp); + __ paddq(dst, tmp); + break; + case HVecReduce::kMin: + case HVecReduce::kMax: + LOG(FATAL) << "Unsupported SIMD type"; + } break; + } default: LOG(FATAL) << "Unsupported SIMD type"; UNREACHABLE(); @@ -129,16 +252,16 @@ static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* in } void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { LocationSummary* locations = instruction->GetLocations(); XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); - Primitive::Type from = instruction->GetInputType(); - Primitive::Type to = instruction->GetResultType(); - if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) { + DataType::Type from = instruction->GetInputType(); + DataType::Type to = instruction->GetResultType(); + if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) { DCHECK_EQ(4u, instruction->GetVectorLength()); __ cvtdq2ps(dst, src); } else { @@ -147,7 +270,7 @@ void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) { } void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { @@ -155,33 +278,34 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pxor(dst, dst); __ psubq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, dst); __ subps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, dst); __ subpd(dst, src); @@ -193,9 +317,9 @@ void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) { } void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Integral-abs requires a temporary for the comparison. - if (instruction->GetPackedType() == Primitive::kPrimInt) { + if (instruction->GetPackedType() == DataType::Type::kInt32) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -205,7 +329,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { DCHECK_EQ(4u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movaps(dst, src); @@ -215,13 +339,13 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { __ psubd(dst, tmp); break; } - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrld(dst, Immediate(1)); __ andps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ psrlq(dst, Immediate(1)); @@ -234,9 +358,9 @@ void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) { } void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) { - CreateVecUnOpLocations(GetGraph()->GetArena(), instruction); + CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction); // Boolean-not requires a temporary to construct the 16 x one. - if (instruction->GetPackedType() == Primitive::kPrimBoolean) { + if (instruction->GetPackedType() == DataType::Type::kBool) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } } @@ -246,7 +370,7 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: { // special case boolean-not + case DataType::Type::kBool: { // special case boolean-not DCHECK_EQ(16u, instruction->GetVectorLength()); XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ pxor(dst, dst); @@ -255,22 +379,23 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { __ pxor(dst, src); break; } - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pcmpeqb(dst, dst); // all ones __ pxor(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ pcmpeqb(dst, dst); // all ones __ xorpd(dst, src); @@ -282,17 +407,18 @@ void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) { } // Helper to set up locations for vector binary operations. -static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); @@ -304,7 +430,7 @@ static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { @@ -313,28 +439,29 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ paddb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ paddw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ paddd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ paddq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ addps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ addpd(dst, src); break; @@ -345,7 +472,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) { } void LocationsBuilderX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) { @@ -355,15 +482,13 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); DCHECK(instruction->IsRounded()); - DCHECK(instruction->IsUnsigned()); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ pavgb(dst, src); return; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pavgw(dst, src); return; @@ -374,7 +499,7 @@ void InstructionCodeGeneratorX86_64::VisitVecHalvingAdd(HVecHalvingAdd* instruct } void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { @@ -383,28 +508,29 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: DCHECK_EQ(16u, instruction->GetVectorLength()); __ psubb(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psubw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psubd(dst, src); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psubq(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ subps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ subpd(dst, src); break; @@ -415,7 +541,7 @@ void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) { } void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { @@ -424,20 +550,20 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ pmullw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pmulld(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ mulps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ mulpd(dst, src); break; @@ -448,7 +574,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) { } void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { @@ -457,11 +583,11 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ divps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ divpd(dst, src); break; @@ -472,7 +598,7 @@ void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) { } void LocationsBuilderX86_64::VisitVecMin(HVecMin* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { @@ -481,40 +607,37 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminub(dst, src); - } else { - __ pminsb(dst, src); - } + __ pminub(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pminsb(dst, src); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminuw(dst, src); - } else { - __ pminsw(dst, src); - } + __ pminuw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pminsw(dst, src); + break; + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pminud(dst, src); - } else { - __ pminsd(dst, src); - } + __ pminud(dst, src); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pminsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ minpd(dst, src); break; default: @@ -524,7 +647,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMin(HVecMin* instruction) { } void LocationsBuilderX86_64::VisitVecMax(HVecMax* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { @@ -533,40 +656,37 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: DCHECK_EQ(16u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxub(dst, src); - } else { - __ pmaxsb(dst, src); - } + __ pmaxub(dst, src); break; - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + DCHECK_EQ(16u, instruction->GetVectorLength()); + __ pmaxsb(dst, src); + break; + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxuw(dst, src); - } else { - __ pmaxsw(dst, src); - } + __ pmaxuw(dst, src); + break; + case DataType::Type::kInt16: + DCHECK_EQ(8u, instruction->GetVectorLength()); + __ pmaxsw(dst, src); break; - case Primitive::kPrimInt: + case DataType::Type::kUint32: DCHECK_EQ(4u, instruction->GetVectorLength()); - if (instruction->IsUnsigned()) { - __ pmaxud(dst, src); - } else { - __ pmaxsd(dst, src); - } + __ pmaxud(dst, src); + break; + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ pmaxsd(dst, src); break; // Next cases are sloppy wrt 0.0 vs -0.0. - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); - DCHECK(!instruction->IsUnsigned()); __ maxpd(dst, src); break; default: @@ -576,7 +696,7 @@ void InstructionCodeGeneratorX86_64::VisitVecMax(HVecMax* instruction) { } void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { @@ -585,21 +705,22 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pand(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andpd(dst, src); break; @@ -610,7 +731,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) { } void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { @@ -619,21 +740,22 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pandn(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ andnps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ andnpd(dst, src); break; @@ -644,7 +766,7 @@ void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) { } void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { @@ -653,21 +775,22 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ por(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ orps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ orpd(dst, src); break; @@ -678,7 +801,7 @@ void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) { } void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) { - CreateVecBinOpLocations(GetGraph()->GetArena(), instruction); + CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { @@ -687,21 +810,22 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); __ pxor(dst, src); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ xorps(dst, src); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ xorpd(dst, src); break; @@ -712,13 +836,13 @@ void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) { } // Helper to set up locations for vector shift operations. -static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) { - LocationSummary* locations = new (arena) LocationSummary(instruction); +static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant())); locations->SetOut(Location::SameAsFirstInput()); @@ -730,7 +854,7 @@ static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* } void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { @@ -739,16 +863,16 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psllw(dst, Immediate(static_cast<int8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ pslld(dst, Immediate(static_cast<int8_t>(value))); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psllq(dst, Immediate(static_cast<int8_t>(value))); break; @@ -759,7 +883,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) { } void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { @@ -768,12 +892,12 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psraw(dst, Immediate(static_cast<int8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrad(dst, Immediate(static_cast<int8_t>(value))); break; @@ -784,7 +908,7 @@ void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) { } void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) { - CreateVecShiftLocations(GetGraph()->GetArena(), instruction); + CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction); } void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { @@ -793,16 +917,16 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue(); XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: DCHECK_EQ(8u, instruction->GetVectorLength()); __ psrlw(dst, Immediate(static_cast<int8_t>(value))); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK_EQ(4u, instruction->GetVectorLength()); __ psrld(dst, Immediate(static_cast<int8_t>(value))); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(2u, instruction->GetVectorLength()); __ psrlq(dst, Immediate(static_cast<int8_t>(value))); break; @@ -812,28 +936,137 @@ void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) { } } -void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + HInstruction* input = instruction->InputAt(0); + bool is_zero = IsZeroBitPattern(input); + + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: + locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant()) + : Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) { + LocationSummary* locations = instruction->GetLocations(); + XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>(); + + DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented + + // Zero out all other elements first. + __ xorps(dst, dst); + + // Shorthand for any type of zero. + if (IsZeroBitPattern(instruction->InputAt(0))) { + return; + } + + // Set required elements. + switch (instruction->GetPackedType()) { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: // TODO: up to here, and? + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + case DataType::Type::kInt32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); + break; + case DataType::Type::kInt64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movd(dst, locations->InAt(0).AsRegister<CpuRegister>()); // is 64-bit + break; + case DataType::Type::kFloat32: + DCHECK_EQ(4u, instruction->GetVectorLength()); + __ movss(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); + break; + case DataType::Type::kFloat64: + DCHECK_EQ(2u, instruction->GetVectorLength()); + __ movsd(dst, locations->InAt(0).AsFpuRegister<XmmRegister>()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +// Helper to set up locations for vector accumulations. +static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) { + LocationSummary* locations = new (allocator) LocationSummary(instruction); + switch (instruction->GetPackedType()) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetInAt(2, Location::RequiresFpuRegister()); + locations->SetOut(Location::SameAsFirstInput()); + break; + default: + LOG(FATAL) << "Unsupported SIMD type"; + UNREACHABLE(); + } +} + +void LocationsBuilderX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); } -void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) { - LOG(FATAL) << "No SIMD for " << instr->GetId(); +void InstructionCodeGeneratorX86_64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) { + // TODO: pmaddwd? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); +} + +void LocationsBuilderX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction); +} + +void InstructionCodeGeneratorX86_64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) { + // TODO: psadbw for unsigned? + LOG(FATAL) << "No SIMD for " << instruction->GetId(); } // Helper to set up locations for vector memory operations. -static void CreateVecMemLocations(ArenaAllocator* arena, +static void CreateVecMemLocations(ArenaAllocator* allocator, HVecMemoryOperation* instruction, bool is_load) { - LocationSummary* locations = new (arena) LocationSummary(instruction); + LocationSummary* locations = new (allocator) LocationSummary(instruction); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); if (is_load) { @@ -859,6 +1092,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin case 8: scale = TIMES_8; break; default: break; } + // Incorporate the string or array offset in the address computation. uint32_t offset = is_string_char_at ? mirror::String::ValueOffset().Uint32Value() : mirror::Array::DataOffset(size).Uint32Value(); @@ -866,7 +1100,7 @@ static Address VecAddress(LocationSummary* locations, size_t size, bool is_strin } void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true); // String load requires a temporary for the compressed load. if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister()); @@ -875,12 +1109,13 @@ void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) { void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); Address address = VecAddress(locations, size, instruction->IsStringCharAt()); XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { - case Primitive::kPrimChar: + case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt. + case DataType::Type::kUint16: DCHECK_EQ(8u, instruction->GetVectorLength()); // Special handling of compressed/uncompressed string load. if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { @@ -893,7 +1128,7 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { __ testb(Address(locations->InAt(0).AsRegister<CpuRegister>(), count_offset), Immediate(1)); __ j(kNotZero, ¬_compressed); // Zero extend 8 compressed bytes into 8 chars. - __ movsd(reg, VecAddress(locations, 1, /*is_string_char_at*/ true)); + __ movsd(reg, VecAddress(locations, 1, instruction->IsStringCharAt())); __ pxor(tmp, tmp); __ punpcklbw(reg, tmp); __ jmp(&done); @@ -904,20 +1139,20 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { return; } FALLTHROUGH_INTENDED; - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address); break; @@ -928,31 +1163,32 @@ void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) { } void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) { - CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false); + CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false); } void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) { LocationSummary* locations = instruction->GetLocations(); - size_t size = Primitive::ComponentSize(instruction->GetPackedType()); + size_t size = DataType::Size(instruction->GetPackedType()); Address address = VecAddress(locations, size, /*is_string_char_at*/ false); XmmRegister reg = locations->InAt(2).AsFpuRegister<XmmRegister>(); bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16); switch (instruction->GetPackedType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_LE(2u, instruction->GetVectorLength()); DCHECK_LE(instruction->GetVectorLength(), 16u); is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: DCHECK_EQ(4u, instruction->GetVectorLength()); is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: DCHECK_EQ(2u, instruction->GetVectorLength()); is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg); break; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index af0e6462a2..6bf045885d 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -17,16 +17,19 @@ #include "code_generator_x86.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86.h" +#include "linker/linker_patch.h" +#include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" -#include "lock_word.h" #include "thread.h" #include "utils/assembler.h" #include "utils/stack_checks.h" @@ -141,7 +144,8 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { // Load the array length into our temporary. - uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + HArrayLength* length = array_length->AsArrayLength(); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<Register>(), len_offset); length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); @@ -151,17 +155,17 @@ class BoundsCheckSlowPathX86 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<Register>(), array_len); - if (mirror::kUseStringCompression) { + if (mirror::kUseStringCompression && length->IsStringLength()) { __ shrl(length_loc.AsRegister<Register>(), Immediate(1)); } } x86_codegen->EmitParallelMoves( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimInt, + DataType::Type::kInt32, length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt); + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -236,13 +240,6 @@ class LoadStringSlowPathX86 : public SlowPathCode { x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX)); RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - Register method_address = locations->InAt(0).AsRegister<Register>(); - __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset), - locations->Out().AsRegister<Register>()); - Label* fixup_label = x86_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); - __ Bind(fixup_label); - __ jmp(GetExitLabel()); } @@ -289,16 +286,6 @@ class LoadClassSlowPathX86 : public SlowPathCode { x86_codegen->Move32(out, Location::RegisterLocation(EAX)); } RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - DCHECK(out.IsValid()); - Register method_address = locations->InAt(0).AsRegister<Register>(); - __ movl(Address(method_address, CodeGeneratorX86::kDummy32BitOffset), - locations->Out().AsRegister<Register>()); - Label* fixup_label = x86_codegen->NewTypeBssEntryPatch(cls_); - __ Bind(fixup_label); - } __ jmp(GetExitLabel()); } @@ -330,7 +317,14 @@ class TypeCheckSlowPathX86 : public SlowPathCode { CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (kPoisonHeapReferences && + instruction_->IsCheckCast() && + instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { + // First, unpoison the `cls` reference that was poisoned for direct memory comparison. + __ UnpoisonHeapReference(locations->InAt(1).AsRegister<Register>()); + } + + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -339,10 +333,10 @@ class TypeCheckSlowPathX86 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; x86_codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, @@ -411,21 +405,21 @@ class ArraySetSlowPathX86 : public SlowPathCode { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -808,19 +802,19 @@ class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove(obj_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -1027,20 +1021,21 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator()), isa_features_(isa_features), - boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), constant_area_start_(-1), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), method_address_offset_(std::less<uint32_t>(), - graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { // Use a fake return address register to mimic Quick. AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1066,8 +1061,14 @@ void CodeGeneratorX86::GenerateFrameEntry() { IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(kMethodRegisterArgument, ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { - __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); + size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); + __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } @@ -1125,24 +1126,27 @@ void CodeGeneratorX86::Bind(HBasicBlock* block) { __ Bind(GetLabelOf(block)); } -Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(Primitive::Type type) const { +Location InvokeDexCallingConventionVisitorX86::GetReturnLocation(DataType::Type type) const { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: return Location::RegisterLocation(EAX); - case Primitive::kPrimLong: + case DataType::Type::kUint64: + case DataType::Type::kInt64: return Location::RegisterPairLocation(EAX, EDX); - case Primitive::kPrimVoid: + case DataType::Type::kVoid: return Location::NoLocation(); - case Primitive::kPrimDouble: - case Primitive::kPrimFloat: + case DataType::Type::kFloat64: + case DataType::Type::kFloat32: return Location::FpuRegisterLocation(XMM0); } @@ -1153,14 +1157,15 @@ Location InvokeDexCallingConventionVisitorX86::GetMethodLocation() const { return Location::RegisterLocation(kMethodRegisterArgument); } -Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86::GetNextLocation(DataType::Type type) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { uint32_t index = gp_index_++; stack_index_++; if (index < calling_convention.GetNumberOfRegisters()) { @@ -1170,7 +1175,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t } } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t index = gp_index_; gp_index_ += 2; stack_index_ += 2; @@ -1183,7 +1188,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t } } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { @@ -1193,7 +1198,7 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t } } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { @@ -1203,7 +1208,9 @@ Location InvokeDexCallingConventionVisitorX86::GetNextLocation(Primitive::Type t } } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; } @@ -1259,10 +1266,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { EmitParallelMoves( Location::RegisterLocation(source.AsRegisterPairHigh<Register>()), Location::RegisterLocation(destination.AsRegisterPairHigh<Register>()), - Primitive::kPrimInt, + DataType::Type::kInt32, Location::RegisterLocation(source.AsRegisterPairLow<Register>()), Location::RegisterLocation(destination.AsRegisterPairLow<Register>()), - Primitive::kPrimInt); + DataType::Type::kInt32); } else if (source.IsFpuRegister()) { XmmRegister src_reg = source.AsFpuRegister<XmmRegister>(); __ movd(destination.AsRegisterPairLow<Register>(), src_reg); @@ -1281,7 +1288,7 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { } else if (source.IsDoubleStackSlot()) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else if (source.IsRegisterPair()) { - size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt); + size_t elem_size = DataType::Size(DataType::Type::kInt32); // Create stack space for 2 elements. __ subl(ESP, Immediate(2 * elem_size)); __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); @@ -1313,10 +1320,10 @@ void CodeGeneratorX86::Move64(Location destination, Location source) { EmitParallelMoves( Location::StackSlot(source.GetStackIndex()), Location::StackSlot(destination.GetStackIndex()), - Primitive::kPrimInt, + DataType::Type::kInt32, Location::StackSlot(source.GetHighStackIndex(kX86WordSize)), Location::StackSlot(destination.GetHighStackIndex(kX86WordSize)), - Primitive::kPrimInt); + DataType::Type::kInt32); } } } @@ -1326,11 +1333,11 @@ void CodeGeneratorX86::MoveConstant(Location location, int32_t value) { __ movl(location.AsRegister<Register>(), Immediate(value)); } -void CodeGeneratorX86::MoveLocation(Location dst, Location src, Primitive::Type dst_type) { - HParallelMove move(GetGraph()->GetArena()); - if (dst_type == Primitive::kPrimLong && !src.IsConstant() && !src.IsFpuRegister()) { - move.AddMove(src.ToLow(), dst.ToLow(), Primitive::kPrimInt, nullptr); - move.AddMove(src.ToHigh(), dst.ToHigh(), Primitive::kPrimInt, nullptr); +void CodeGeneratorX86::MoveLocation(Location dst, Location src, DataType::Type dst_type) { + HParallelMove move(GetGraph()->GetAllocator()); + if (dst_type == DataType::Type::kInt64 && !src.IsConstant() && !src.IsFpuRegister()) { + move.AddMove(src.ToLow(), dst.ToLow(), DataType::Type::kInt32, nullptr); + move.AddMove(src.ToHigh(), dst.ToHigh(), DataType::Type::kInt32, nullptr); } else { move.AddMove(src, dst, dst_type, nullptr); } @@ -1349,13 +1356,22 @@ void CodeGeneratorX86::AddLocationAsTemp(Location location, LocationSummary* loc } void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); + __ popl(EAX); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -1553,16 +1569,16 @@ void InstructionCodeGeneratorX86::GenerateCompareTestAndBranch(HCondition* condi Location left = locations->InAt(0); Location right = locations->InAt(1); - Primitive::Type type = condition->InputAt(0)->GetType(); + DataType::Type type = condition->InputAt(0)->GetType(); switch (type) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateLongComparesAndJumps(condition, true_target, false_target); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: GenerateFPCompare(left, right, condition, false); GenerateFPJumps(condition, true_target, false_target); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: GenerateFPCompare(left, right, condition, true); GenerateFPJumps(condition, true_target, false_target); break; @@ -1585,8 +1601,8 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { // conditions if they are materialized due to the complex branching. return cond->IsCondition() && cond->GetNext() == branch && - cond->InputAt(0)->GetType() != Primitive::kPrimLong && - !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); + cond->InputAt(0)->GetType() != DataType::Type::kInt64 && + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); } template<class LabelType> @@ -1650,8 +1666,8 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio // If this is a long or FP comparison that has been folded into // the HCondition, generate the comparison directly. - Primitive::Type type = condition->InputAt(0)->GetType(); - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + DataType::Type type = condition->InputAt(0)->GetType(); + if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { GenerateCompareTestAndBranch(condition, true_target, false_target); return; } @@ -1675,7 +1691,7 @@ void InstructionCodeGeneratorX86::GenerateTestAndBranch(HInstruction* instructio } void LocationsBuilderX86::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -1692,7 +1708,7 @@ void InstructionCodeGeneratorX86::VisitIf(HIf* if_instr) { } void LocationsBuilderX86::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -1712,7 +1728,7 @@ void InstructionCodeGeneratorX86::VisitDeoptimize(HDeoptimize* deoptimize) { } void LocationsBuilderX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -1724,7 +1740,7 @@ void InstructionCodeGeneratorX86::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFla static bool SelectCanUseCMOV(HSelect* select) { // There are no conditional move instructions for XMMs. - if (Primitive::IsFloatingPointType(select->GetType())) { + if (DataType::IsFloatingPointType(select->GetType())) { return false; } @@ -1732,9 +1748,9 @@ static bool SelectCanUseCMOV(HSelect* select) { // In 32 bit mode, a long condition doesn't generate a single CC either. HInstruction* condition = select->GetCondition(); if (condition->IsCondition()) { - Primitive::Type compare_type = condition->InputAt(0)->GetType(); - if (compare_type == Primitive::kPrimLong || - Primitive::IsFloatingPointType(compare_type)) { + DataType::Type compare_type = condition->InputAt(0)->GetType(); + if (compare_type == DataType::Type::kInt64 || + DataType::IsFloatingPointType(compare_type)) { return false; } } @@ -1744,8 +1760,8 @@ static bool SelectCanUseCMOV(HSelect* select) { } void LocationsBuilderX86::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); + if (DataType::IsFloatingPointType(select->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); } else { @@ -1793,8 +1809,8 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } } else { // We can't handle FP or long here. - DCHECK_NE(condition->InputAt(0)->GetType(), Primitive::kPrimLong); - DCHECK(!Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())); + DCHECK_NE(condition->InputAt(0)->GetType(), DataType::Type::kInt64); + DCHECK(!DataType::IsFloatingPointType(condition->InputAt(0)->GetType())); LocationSummary* cond_locations = condition->GetLocations(); codegen_->GenerateIntCompare(cond_locations->InAt(0), cond_locations->InAt(1)); cond = X86Condition(condition->GetCondition()); @@ -1808,7 +1824,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { // If the condition is true, overwrite the output, which already contains false. Location false_loc = locations->InAt(0); Location true_loc = locations->InAt(1); - if (select->GetType() == Primitive::kPrimLong) { + if (select->GetType() == DataType::Type::kInt64) { // 64 bit conditional move. Register false_high = false_loc.AsRegisterPairHigh<Register>(); Register false_low = false_loc.AsRegisterPairLow<Register>(); @@ -1838,7 +1854,7 @@ void InstructionCodeGeneratorX86::VisitSelect(HSelect* select) { } void LocationsBuilderX86::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorX86::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -1851,10 +1867,10 @@ void CodeGeneratorX86::GenerateNop() { void LocationsBuilderX86::HandleCondition(HCondition* cond) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. switch (cond->InputAt(0)->GetType()) { - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (!cond->IsEmittedAtUseSite()) { @@ -1862,8 +1878,8 @@ void LocationsBuilderX86::HandleCondition(HCondition* cond) { } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (cond->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(cond->InputAt(1)->IsEmittedAtUseSite()); @@ -1909,14 +1925,14 @@ void InstructionCodeGeneratorX86::HandleCondition(HCondition* cond) { __ setb(X86Condition(cond->GetCondition()), reg); return; } - case Primitive::kPrimLong: + case DataType::Type::kInt64: GenerateLongComparesAndJumps(cond, &true_label, &false_label); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: GenerateFPCompare(lhs, rhs, cond, false); GenerateFPJumps(cond, &true_label, &false_label); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: GenerateFPCompare(lhs, rhs, cond, true); GenerateFPJumps(cond, &true_label, &false_label); break; @@ -2018,7 +2034,7 @@ void InstructionCodeGeneratorX86::VisitAboveOrEqual(HAboveOrEqual* comp) { void LocationsBuilderX86::VisitIntConstant(HIntConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2028,7 +2044,7 @@ void InstructionCodeGeneratorX86::VisitIntConstant(HIntConstant* constant ATTRIB void LocationsBuilderX86::VisitNullConstant(HNullConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2038,7 +2054,7 @@ void InstructionCodeGeneratorX86::VisitNullConstant(HNullConstant* constant ATTR void LocationsBuilderX86::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2048,7 +2064,7 @@ void InstructionCodeGeneratorX86::VisitLongConstant(HLongConstant* constant ATTR void LocationsBuilderX86::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2058,7 +2074,7 @@ void InstructionCodeGeneratorX86::VisitFloatConstant(HFloatConstant* constant AT void LocationsBuilderX86::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2093,24 +2109,25 @@ void InstructionCodeGeneratorX86::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNU void LocationsBuilderX86::VisitReturn(HReturn* ret) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); switch (ret->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RegisterLocation(EAX)); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt( 0, Location::RegisterPairLocation(EAX, EDX)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt( 0, Location::FpuRegisterLocation(XMM0)); break; @@ -2123,22 +2140,23 @@ void LocationsBuilderX86::VisitReturn(HReturn* ret) { void InstructionCodeGeneratorX86::VisitReturn(HReturn* ret) { if (kIsDebugBuild) { switch (ret->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<Register>(), EAX); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairLow<Register>(), EAX); DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegisterPairHigh<Register>(), EDX); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>(), XMM0); break; @@ -2292,22 +2310,22 @@ void InstructionCodeGeneratorX86::VisitInvokePolymorphic(HInvokePolymorphic* inv void LocationsBuilderX86::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -2323,13 +2341,13 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { Location out = locations->Out(); Location in = locations->InAt(0); switch (neg->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); __ negl(out.AsRegister<Register>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK(in.IsRegisterPair()); DCHECK(in.Equals(out)); __ negl(out.AsRegisterPairLow<Register>()); @@ -2342,7 +2360,7 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { __ negl(out.AsRegisterPairHigh<Register>()); break; - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { DCHECK(in.Equals(out)); Register constant = locations->GetTemp(0).AsRegister<Register>(); XmmRegister mask = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); @@ -2355,7 +2373,7 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { DCHECK(in.Equals(out)); XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement double negation with an exclusive or with value @@ -2373,8 +2391,8 @@ void InstructionCodeGeneratorX86::VisitNeg(HNeg* neg) { void LocationsBuilderX86::VisitX86FPNeg(HX86FPNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); - DCHECK(Primitive::IsFloatingPointType(neg->GetType())); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); + DCHECK(DataType::IsFloatingPointType(neg->GetType())); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); @@ -2388,7 +2406,7 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { Register constant_area = locations->InAt(1).AsRegister<Register>(); XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); - if (neg->GetType() == Primitive::kPrimFloat) { + if (neg->GetType() == DataType::Type::kFloat32) { __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x80000000), neg->GetBaseMethodAddress(), constant_area)); @@ -2402,28 +2420,36 @@ void InstructionCodeGeneratorX86::VisitX86FPNeg(HX86FPNeg* neg) { } void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; // The float-to-long and double-to-long type conversions rely on a // call to the runtime. LocationSummary::CallKind call_kind = - ((input_type == Primitive::kPrimFloat || input_type == Primitive::kPrimDouble) - && result_type == Primitive::kPrimLong) + ((input_type == DataType::Type::kFloat32 || input_type == DataType::Type::kFloat64) + && result_type == DataType::Type::kInt64) ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, call_kind); - - // The Java language does not allow treating boolean as an integral type but - // our bit representation makes it safe. + new (GetGraph()->GetAllocator()) LocationSummary(conversion, call_kind); switch (result_type) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: switch (input_type) { - case Primitive::kPrimLong: { - // Type conversion from long to byte is a result of code transformations. + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0))); + // Make the output overlap to please the register allocator. This greatly simplifies + // the validation of the linear scan implementation + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + break; + case DataType::Type::kInt64: { HInstruction* input = conversion->InputAt(0); Location input_location = input->IsConstant() ? Location::ConstantLocation(input->AsConstant()) @@ -2434,17 +2460,6 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); break; } - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. - locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0))); - // Make the output overlap to please the register allocator. This greatly simplifies - // the validation of the linear scan implementation - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); - break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -2452,43 +2467,27 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimShort: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK(DataType::IsIntegralType(input_type)) << input_type; + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -2500,22 +2499,20 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RegisterLocation(EAX)); locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { - // Processing a Dex `float-to-long' or 'double-to-long' instruction. + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { InvokeRuntimeCallingConvention calling_convention; XmmRegister parameter = calling_convention.GetFpuRegisterAt(0); locations->SetInAt(0, Location::FpuRegisterLocation(parameter)); @@ -2531,47 +2528,24 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimChar: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to char is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `int-to-char' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::Any()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -2579,30 +2553,27 @@ void LocationsBuilderX86::VisitTypeConversion(HTypeConversion* conversion) { default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::Any()); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -2623,28 +2594,47 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio LocationSummary* locations = conversion->GetLocations(); Location out = locations->Out(); Location in = locations->InAt(0); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; switch (result_type) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to byte is a result of code transformations. + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + if (in.IsRegister()) { + __ movzxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); + } else { + DCHECK(in.GetConstant()->IsIntConstant()); + int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value))); + } + break; + case DataType::Type::kInt64: if (in.IsRegisterPair()) { - __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>()); + __ movzxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>()); } else { DCHECK(in.GetConstant()->IsLongConstant()); int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint8_t>(value))); } break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt8: + switch (input_type) { + case DataType::Type::kUint8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: if (in.IsRegister()) { __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>()); } else { @@ -2653,6 +2643,15 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); } break; + case DataType::Type::kInt64: + if (in.IsRegisterPair()) { + __ movsxb(out.AsRegister<Register>(), in.AsRegisterPairLow<ByteRegister>()); + } else { + DCHECK(in.GetConstant()->IsLongConstant()); + int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value))); + } + break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -2660,26 +2659,43 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimShort: + case DataType::Type::kUint16: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. + case DataType::Type::kInt8: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + if (in.IsRegister()) { + __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>()); + } else if (in.IsStackSlot()) { + __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); + } else { + DCHECK(in.GetConstant()->IsIntConstant()); + int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); + } + break; + case DataType::Type::kInt64: if (in.IsRegisterPair()) { - __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); + __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); } else if (in.IsDoubleStackSlot()) { - __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); + __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); } else { DCHECK(in.GetConstant()->IsLongConstant()); int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); } break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt16: + switch (input_type) { + case DataType::Type::kUint16: + case DataType::Type::kInt32: if (in.IsRegister()) { __ movsxw(out.AsRegister<Register>(), in.AsRegister<Register>()); } else if (in.IsStackSlot()) { @@ -2690,6 +2706,17 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); } break; + case DataType::Type::kInt64: + if (in.IsRegisterPair()) { + __ movsxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); + } else if (in.IsDoubleStackSlot()) { + __ movsxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); + } else { + DCHECK(in.GetConstant()->IsLongConstant()); + int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); + __ movl(out.AsRegister<Register>(), Immediate(static_cast<int16_t>(value))); + } + break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type @@ -2697,10 +2724,9 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: if (in.IsRegisterPair()) { __ movl(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); } else if (in.IsDoubleStackSlot()) { @@ -2713,8 +2739,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimFloat: { - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); Register output = out.AsRegister<Register>(); XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); @@ -2738,8 +2763,7 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio break; } - case Primitive::kPrimDouble: { - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); Register output = out.AsRegister<Register>(); XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); @@ -2769,29 +2793,26 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: DCHECK_EQ(out.AsRegisterPairLow<Register>(), EAX); DCHECK_EQ(out.AsRegisterPairHigh<Register>(), EDX); DCHECK_EQ(in.AsRegister<Register>(), EAX); __ cdq(); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-long' instruction. + case DataType::Type::kFloat32: codegen_->InvokeRuntime(kQuickF2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickF2l, int64_t, float>(); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-long' instruction. + case DataType::Type::kFloat64: codegen_->InvokeRuntime(kQuickD2l, conversion, conversion->GetDexPc()); CheckEntrypointTypes<kQuickD2l, int64_t, double>(); break; @@ -2802,64 +2823,25 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio } break; - case Primitive::kPrimChar: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - if (in.IsRegisterPair()) { - __ movzxw(out.AsRegister<Register>(), in.AsRegisterPairLow<Register>()); - } else if (in.IsDoubleStackSlot()) { - __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); - } else { - DCHECK(in.GetConstant()->IsLongConstant()); - int64_t value = in.GetConstant()->AsLongConstant()->GetValue(); - __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); - } - break; - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `Process a Dex `int-to-char'' instruction. - if (in.IsRegister()) { - __ movzxw(out.AsRegister<Register>(), in.AsRegister<Register>()); - } else if (in.IsStackSlot()) { - __ movzxw(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex())); - } else { - DCHECK(in.GetConstant()->IsIntConstant()); - int32_t value = in.GetConstant()->AsIntConstant()->GetValue(); - __ movl(out.AsRegister<Register>(), Immediate(static_cast<uint16_t>(value))); - } - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); break; - case Primitive::kPrimLong: { - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: { size_t adjustment = 0; // Create stack space for the call to // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstps below. // TODO: enhance register allocator to ask for stack temporaries. if (!in.IsDoubleStackSlot() || !out.IsStackSlot()) { - adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + adjustment = DataType::Size(DataType::Type::kInt64); __ subl(ESP, Immediate(adjustment)); } @@ -2881,38 +2863,35 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio break; } - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<Register>()); break; - case Primitive::kPrimLong: { - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: { size_t adjustment = 0; // Create stack space for the call to // InstructionCodeGeneratorX86::PushOntoFPStack and/or X86Assembler::fstpl below. // TODO: enhance register allocator to ask for stack temporaries. if (!in.IsDoubleStackSlot() || !out.IsDoubleStackSlot()) { - adjustment = Primitive::ComponentSize(Primitive::kPrimLong); + adjustment = DataType::Size(DataType::Type::kInt64); __ subl(ESP, Immediate(adjustment)); } @@ -2934,15 +2913,14 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio break; } - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); break; default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; default: @@ -2953,24 +2931,24 @@ void InstructionCodeGeneratorX86::VisitTypeConversion(HTypeConversion* conversio void LocationsBuilderX86::VisitAdd(HAdd* add) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (add->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(add->InputAt(1)->IsEmittedAtUseSite()); @@ -2996,7 +2974,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { Location out = locations->Out(); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (second.IsRegister()) { if (out.AsRegister<Register>() == first.AsRegister<Register>()) { __ addl(out.AsRegister<Register>(), second.AsRegister<Register>()); @@ -3020,7 +2998,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsRegisterPair()) { __ addl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ adcl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); @@ -3037,7 +3015,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3055,7 +3033,7 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (add->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3080,17 +3058,17 @@ void InstructionCodeGeneratorX86::VisitAdd(HAdd* add) { void LocationsBuilderX86::VisitSub(HSub* sub) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(sub->InputAt(1)->IsEmittedAtUseSite()); @@ -3114,7 +3092,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { Location second = locations->InAt(1); DCHECK(first.Equals(locations->Out())); switch (sub->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (second.IsRegister()) { __ subl(first.AsRegister<Register>(), second.AsRegister<Register>()); } else if (second.IsConstant()) { @@ -3126,7 +3104,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsRegisterPair()) { __ subl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); __ sbbl(first.AsRegisterPairHigh<Register>(), second.AsRegisterPairHigh<Register>()); @@ -3143,7 +3121,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3161,7 +3139,7 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (sub->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3186,9 +3164,9 @@ void InstructionCodeGeneratorX86::VisitSub(HSub* sub) { void LocationsBuilderX86::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (mul->InputAt(1)->IsIntConstant()) { @@ -3198,7 +3176,7 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { locations->SetOut(Location::SameAsFirstInput()); } break; - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -3207,8 +3185,8 @@ void LocationsBuilderX86::VisitMul(HMul* mul) { locations->AddTemp(Location::RegisterLocation(EDX)); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (mul->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(mul->InputAt(1)->IsEmittedAtUseSite()); @@ -3233,7 +3211,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { Location out = locations->Out(); switch (mul->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: // The constant may have ended up in a register, so test explicitly to avoid // problems where the output may not be the same as the first operand. if (mul->InputAt(1)->IsIntConstant()) { @@ -3249,7 +3227,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { } break; - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register in1_hi = first.AsRegisterPairHigh<Register>(); Register in1_lo = first.AsRegisterPairLow<Register>(); Register eax = locations->GetTemp(0).AsRegister<Register>(); @@ -3331,7 +3309,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { DCHECK(first.Equals(locations->Out())); if (second.IsFpuRegister()) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); @@ -3350,7 +3328,7 @@ void InstructionCodeGeneratorX86::VisitMul(HMul* mul) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { DCHECK(first.Equals(locations->Out())); if (second.IsFpuRegister()) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); @@ -3416,9 +3394,9 @@ void InstructionCodeGeneratorX86::PushOntoFPStack(Location source, } void InstructionCodeGeneratorX86::GenerateRemFP(HRem *rem) { - Primitive::Type type = rem->GetResultType(); - bool is_float = type == Primitive::kPrimFloat; - size_t elem_size = Primitive::ComponentSize(type); + DataType::Type type = rem->GetResultType(); + bool is_float = type == DataType::Type::kFloat32; + size_t elem_size = DataType::Size(type); LocationSummary* locations = rem->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); @@ -3595,7 +3573,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr bool is_div = instruction->IsDiv(); switch (instruction->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { DCHECK_EQ(EAX, first.AsRegister<Register>()); DCHECK_EQ(is_div ? EAX : EDX, out.AsRegister<Register>()); @@ -3613,7 +3591,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr GenerateDivRemWithAnyConstant(instruction); } } else { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86( instruction, out.AsRegister<Register>(), is_div); codegen_->AddSlowPath(slow_path); @@ -3634,7 +3612,7 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConvention calling_convention; DCHECK_EQ(calling_convention.GetRegisterAt(0), first.AsRegisterPairLow<Register>()); DCHECK_EQ(calling_convention.GetRegisterAt(1), first.AsRegisterPairHigh<Register>()); @@ -3659,13 +3637,13 @@ void InstructionCodeGeneratorX86::GenerateDivRemIntegral(HBinaryOperation* instr } void LocationsBuilderX86::VisitDiv(HDiv* div) { - LocationSummary::CallKind call_kind = (div->GetResultType() == Primitive::kPrimLong) + LocationSummary::CallKind call_kind = (div->GetResultType() == DataType::Type::kInt64) ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(div, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(div, call_kind); switch (div->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RegisterLocation(EAX)); locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); @@ -3679,7 +3657,7 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); @@ -3689,8 +3667,8 @@ void LocationsBuilderX86::VisitDiv(HDiv* div) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (div->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(div->InputAt(1)->IsEmittedAtUseSite()); @@ -3714,13 +3692,13 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { Location second = locations->InAt(1); switch (div->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GenerateDivRemIntegral(div); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3738,7 +3716,7 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (div->InputAt(1)->IsX86LoadFromConstantTable()) { @@ -3762,15 +3740,15 @@ void InstructionCodeGeneratorX86::VisitDiv(HDiv* div) { } void LocationsBuilderX86::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); - LocationSummary::CallKind call_kind = (rem->GetResultType() == Primitive::kPrimLong) + LocationSummary::CallKind call_kind = (rem->GetResultType() == DataType::Type::kInt64) ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(rem, call_kind); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RegisterLocation(EAX)); locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); locations->SetOut(Location::RegisterLocation(EDX)); @@ -3782,7 +3760,7 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterPairLocation( calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1))); @@ -3792,8 +3770,8 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { locations->SetOut(Location::RegisterPairLocation(EAX, EDX)); break; } - case Primitive::kPrimDouble: - case Primitive::kPrimFloat: { + case DataType::Type::kFloat64: + case DataType::Type::kFloat32: { locations->SetInAt(0, Location::Any()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); @@ -3807,15 +3785,15 @@ void LocationsBuilderX86::VisitRem(HRem* rem) { } void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { GenerateRemFP(rem); break; } @@ -3827,15 +3805,16 @@ void InstructionCodeGeneratorX86::VisitRem(HRem* rem) { void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction); switch (instruction->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { locations->SetInAt(0, Location::Any()); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0))); if (!instruction->IsConstant()) { locations->AddTemp(Location::RequiresRegister()); @@ -3848,18 +3827,20 @@ void LocationsBuilderX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { } void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86(instruction); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(0); switch (instruction->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (value.IsRegister()) { __ testl(value.AsRegister<Register>(), value.AsRegister<Register>()); __ j(kEqual, slow_path->GetEntryLabel()); @@ -3874,7 +3855,7 @@ void InstructionCodeGeneratorX86::VisitDivZeroCheck(HDivZeroCheck* instruction) } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (value.IsRegisterPair()) { Register temp = locations->GetTemp(0).AsRegister<Register>(); __ movl(temp, value.AsRegisterPairLow<Register>()); @@ -3897,11 +3878,11 @@ void LocationsBuilderX86::HandleShift(HBinaryOperation* op) { DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { // Can't have Location::Any() and output SameAsFirstInput() locations->SetInAt(0, Location::RequiresRegister()); // The shift count needs to be in CL or a constant. @@ -3923,7 +3904,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { DCHECK(first.Equals(locations->Out())); switch (op->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { DCHECK(first.IsRegister()); Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { @@ -3952,7 +3933,7 @@ void InstructionCodeGeneratorX86::HandleShift(HBinaryOperation* op) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); DCHECK_EQ(ECX, second_reg); @@ -3996,10 +3977,10 @@ void InstructionCodeGeneratorX86::GenerateShlLong(const Location& loc, int shift codegen_->EmitParallelMoves( loc.ToLow(), loc.ToHigh(), - Primitive::kPrimInt, + DataType::Type::kInt32, Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToLow(), - Primitive::kPrimInt); + DataType::Type::kInt32); } else if (shift > 32) { // Low part becomes 0. High part is low part << (shift-32). __ movl(high, low); @@ -4063,10 +4044,10 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, int shif codegen_->EmitParallelMoves( loc.ToHigh(), loc.ToLow(), - Primitive::kPrimInt, + DataType::Type::kInt32, Location::ConstantLocation(GetGraph()->GetIntConstant(0)), loc.ToHigh(), - Primitive::kPrimInt); + DataType::Type::kInt32); } else if (shift > 32) { // Low part is high >> (shift - 32). High part becomes 0. __ movl(low, high); @@ -4092,14 +4073,14 @@ void InstructionCodeGeneratorX86::GenerateUShrLong(const Location& loc, Register void LocationsBuilderX86::VisitRor(HRor* ror) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); switch (ror->GetResultType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: // Add the temporary needed. locations->AddTemp(Location::RequiresRegister()); FALLTHROUGH_INTENDED; - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetInAt(0, Location::RequiresRegister()); // The shift count needs to be in CL (unless it is a constant). locations->SetInAt(1, Location::ByteRegisterOrConstant(ECX, ror->InputAt(1))); @@ -4116,7 +4097,7 @@ void InstructionCodeGeneratorX86::VisitRor(HRor* ror) { Location first = locations->InAt(0); Location second = locations->InAt(1); - if (ror->GetResultType() == Primitive::kPrimInt) { + if (ror->GetResultType() == DataType::Type::kInt32) { Register first_reg = first.AsRegister<Register>(); if (second.IsRegister()) { Register second_reg = second.AsRegister<Register>(); @@ -4128,7 +4109,7 @@ void InstructionCodeGeneratorX86::VisitRor(HRor* ror) { return; } - DCHECK_EQ(ror->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(ror->GetResultType(), DataType::Type::kInt64); Register first_reg_lo = first.AsRegisterPairLow<Register>(); Register first_reg_hi = first.AsRegisterPairHigh<Register>(); Register temp_reg = locations->GetTemp(0).AsRegister<Register>(); @@ -4200,8 +4181,8 @@ void InstructionCodeGeneratorX86::VisitUShr(HUShr* ushr) { } void LocationsBuilderX86::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -4229,8 +4210,8 @@ void InstructionCodeGeneratorX86::VisitNewInstance(HNewInstance* instruction) { } void LocationsBuilderX86::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); locations->SetOut(Location::RegisterLocation(EAX)); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -4249,7 +4230,7 @@ void InstructionCodeGeneratorX86::VisitNewArray(HNewArray* instruction) { void LocationsBuilderX86::VisitParameterValue(HParameterValue* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -4265,7 +4246,7 @@ void InstructionCodeGeneratorX86::VisitParameterValue( void LocationsBuilderX86::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } @@ -4274,7 +4255,7 @@ void InstructionCodeGeneratorX86::VisitCurrentMethod(HCurrentMethod* instruction void LocationsBuilderX86::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -4300,7 +4281,7 @@ void InstructionCodeGeneratorX86::VisitClassTableGet(HClassTableGet* instruction void LocationsBuilderX86::VisitNot(HNot* not_) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4311,11 +4292,11 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { Location out = locations->Out(); DCHECK(in.Equals(out)); switch (not_->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ notl(out.AsRegister<Register>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ notl(out.AsRegisterPairLow<Register>()); __ notl(out.AsRegisterPairHigh<Register>()); break; @@ -4327,7 +4308,7 @@ void InstructionCodeGeneratorX86::VisitNot(HNot* not_) { void LocationsBuilderX86::VisitBooleanNot(HBooleanNot* bool_not) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4342,21 +4323,22 @@ void InstructionCodeGeneratorX86::VisitBooleanNot(HBooleanNot* bool_not) { void LocationsBuilderX86::VisitCompare(HCompare* compare) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); if (compare->InputAt(1)->IsX86LoadFromConstantTable()) { DCHECK(compare->InputAt(1)->IsEmittedAtUseSite()); @@ -4383,15 +4365,16 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { Condition less_cond = kLess; switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { codegen_->GenerateIntCompare(left, right); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register left_low = left.AsRegisterPairLow<Register>(); Register left_high = left.AsRegisterPairHigh<Register>(); int32_t val_low = 0; @@ -4427,13 +4410,13 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { less_cond = kBelow; // for CF (unsigned). break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { GenerateFPCompare(left, right, compare, false); __ j(kUnordered, compare->IsGtBias() ? &greater : &less); less_cond = kBelow; // for CF (floats). break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { GenerateFPCompare(left, right, compare, true); __ j(kUnordered, compare->IsGtBias() ? &greater : &less); less_cond = kBelow; // for CF (floats). @@ -4459,7 +4442,7 @@ void InstructionCodeGeneratorX86::VisitCompare(HCompare* compare) { void LocationsBuilderX86::VisitPhi(HPhi* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -4545,7 +4528,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: @@ -4555,10 +4538,7 @@ void CodeGeneratorX86::GenerateStaticOrDirectCall( Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>()); __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( - invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(), - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -4615,99 +4595,105 @@ void CodeGeneratorX86::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { +void CodeGeneratorX86::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); - boot_image_method_patches_.emplace_back(address, - *invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); + boot_image_method_patches_.emplace_back( + method_address, invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86::NewMethodBssEntryPatch( - HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method) { +void CodeGeneratorX86::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u); + HX86ComputeBaseMethodAddress* method_address = + invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(); // Add the patch entry and bind its label at the end of the instruction. - method_bss_entry_patches_.emplace_back(method_address, - *target_method.dex_file, - target_method.dex_method_index); - return &method_bss_entry_patches_.back().label; + method_bss_entry_patches_.emplace_back( + method_address, &GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); } -void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); - boot_image_type_patches_.emplace_back(address, - load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86::RecordBootImageTypePatch(HLoadClass* load_class) { + HX86ComputeBaseMethodAddress* method_address = + load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_type_patches_.emplace_back( + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86::NewTypeBssEntryPatch(HLoadClass* load_class) { - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_class->InputAt(0)->AsX86ComputeBaseMethodAddress(); type_bss_entry_patches_.emplace_back( - address, load_class->GetDexFile(), load_class->GetTypeIndex().index_); + method_address, &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back(address, - load_string->GetDexFile(), - load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86::RecordBootImageStringPatch(HLoadString* load_string) { + HX86ComputeBaseMethodAddress* method_address = + load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); + boot_image_string_patches_.emplace_back( + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - HX86ComputeBaseMethodAddress* address = + HX86ComputeBaseMethodAddress* method_address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress(); - string_patches_.emplace_back( - address, load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + string_bss_entry_patches_.emplace_back( + method_address, &load_string->GetDexFile(), load_string->GetStringIndex().index_); + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorX86::EmitPcRelativeLinkerPatches( const ArenaDeque<X86PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const X86PcRelativePatchInfo& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; - linker_patches->push_back(Factory( - literal_offset, &info.dex_file, GetMethodAddressOffset(info.method_address), info.index)); + linker_patches->push_back(Factory(literal_offset, + info.target_dex_file, + GetMethodAddressOffset(info.method_address), + info.offset_or_index)); } } -void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = boot_image_method_patches_.size() + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + boot_image_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -4735,18 +4721,18 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - kEmitCompilerReadBarrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + kEmitCompilerReadBarrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps in case of long: we don't want the low move @@ -4756,12 +4742,12 @@ void LocationsBuilderX86::HandleFieldGet(HInstruction* instruction, const FieldI // the read barrier. locations->SetOut( Location::RequiresRegister(), - (object_field_get_with_read_barrier || instruction->GetType() == Primitive::kPrimLong) ? + (object_field_get_with_read_barrier || instruction->GetType() == DataType::Type::kInt64) ? Location::kOutputOverlap : Location::kNoOutputOverlap); } - if (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) { + if (field_info.IsVolatile() && (field_info.GetFieldType() == DataType::Type::kInt64)) { // Long values can be loaded atomically into an XMM using movsd. // So we use an XMM register as a temp to achieve atomicity (first // load the temp into the XMM and then copy the XMM into the @@ -4779,35 +4765,37 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, Register base = base_loc.AsRegister<Register>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type load_type = instruction->GetType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (field_type) { - case Primitive::kPrimBoolean: { + switch (load_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: { __ movzxb(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { __ movsxb(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimShort: { - __ movsxw(out.AsRegister<Register>(), Address(base, offset)); + case DataType::Type::kUint16: { + __ movzxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimChar: { - __ movzxw(out.AsRegister<Register>(), Address(base, offset)); + case DataType::Type::kInt16: { + __ movsxw(out.AsRegister<Register>(), Address(base, offset)); break; } - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ movl(out.AsRegister<Register>(), Address(base, offset)); break; - case Primitive::kPrimNot: { + case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this @@ -4831,7 +4819,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (is_volatile) { XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); __ movsd(temp, Address(base, offset)); @@ -4848,22 +4836,24 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); } - if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimLong) { + if (load_type == DataType::Type::kReference || load_type == DataType::Type::kInt64) { // Potential implicit null checks, in the case of reference or // long fields, are handled in the previous switch statement. } else { @@ -4871,7 +4861,7 @@ void InstructionCodeGeneratorX86::HandleFieldGet(HInstruction* instruction, } if (is_volatile) { - if (field_type == Primitive::kPrimNot) { + if (load_type == DataType::Type::kReference) { // Memory barriers, in the case of references, are also handled // in the previous switch statement. } else { @@ -4884,26 +4874,25 @@ void LocationsBuilderX86::HandleFieldSet(HInstruction* instruction, const FieldI DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); - bool is_byte_type = (field_type == Primitive::kPrimBoolean) - || (field_type == Primitive::kPrimByte); + DataType::Type field_type = field_info.GetFieldType(); + bool is_byte_type = DataType::Size(field_type) == 1u; // The register allocator does not support multiple // inputs that die at entry with one in a specific register. if (is_byte_type) { // Ensure the value is in a byte register. locations->SetInAt(1, Location::RegisterLocation(EAX)); - } else if (Primitive::IsFloatingPointType(field_type)) { - if (is_volatile && field_type == Primitive::kPrimDouble) { + } else if (DataType::IsFloatingPointType(field_type)) { + if (is_volatile && field_type == DataType::Type::kFloat64) { // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::RequiresFpuRegister()); } else { locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1))); } - } else if (is_volatile && field_type == Primitive::kPrimLong) { + } else if (is_volatile && field_type == DataType::Type::kInt64) { // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::RequiresRegister()); @@ -4935,7 +4924,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, Register base = locations->InAt(0).AsRegister<Register>(); Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); @@ -4947,30 +4936,31 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, bool maybe_record_implicit_null_check_done = false; switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { __ movb(Address(base, offset), value.AsRegister<ByteRegister>()); break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { if (value.IsConstant()) { - int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - __ movw(Address(base, offset), Immediate(v)); + __ movw(Address(base, offset), + Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); } else { __ movw(Address(base, offset), value.AsRegister<Register>()); } break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kInt32: + case DataType::Type::kReference: { if (kPoisonHeapReferences && needs_write_barrier) { // Note that in the case where `value` is a null reference, // we do not enter this block, as the reference does not // need poisoning. - DCHECK_EQ(field_type, Primitive::kPrimNot); + DCHECK_EQ(field_type, DataType::Type::kReference); Register temp = locations->GetTemp(0).AsRegister<Register>(); __ movl(temp, value.AsRegister<Register>()); __ PoisonHeapReference(temp); @@ -4985,7 +4975,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (is_volatile) { XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>(); @@ -5008,7 +4998,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (value.IsConstant()) { int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); __ movl(Address(base, offset), Immediate(v)); @@ -5018,7 +5008,7 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (value.IsConstant()) { int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant()); __ movl(Address(base, offset), Immediate(Low32Bits(v))); @@ -5031,7 +5021,9 @@ void InstructionCodeGeneratorX86::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } @@ -5171,7 +5163,7 @@ void CodeGeneratorX86::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorX86::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86(instruction); + SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86(instruction); AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); @@ -5196,18 +5188,18 @@ void InstructionCodeGeneratorX86::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps in case of long: we don't want the low move @@ -5217,9 +5209,9 @@ void LocationsBuilderX86::VisitArrayGet(HArrayGet* instruction) { // the read barrier. locations->SetOut( Location::RequiresRegister(), - (instruction->GetType() == Primitive::kPrimLong || object_array_get_with_read_barrier) ? - Location::kOutputOverlap : - Location::kNoOutputOverlap); + (instruction->GetType() == DataType::Type::kInt64 || object_array_get_with_read_barrier) + ? Location::kOutputOverlap + : Location::kNoOutputOverlap); } } @@ -5231,27 +5223,22 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); switch (type) { - case Primitive::kPrimBoolean: { + case DataType::Type::kBool: + case DataType::Type::kUint8: { Register out = out_loc.AsRegister<Register>(); __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { Register out = out_loc.AsRegister<Register>(); __ movsxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } - case Primitive::kPrimShort: { - Register out = out_loc.AsRegister<Register>(); - __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); - break; - } - - case Primitive::kPrimChar: { + case DataType::Type::kUint16: { Register out = out_loc.AsRegister<Register>(); if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { // Branch cases into compressed and uncompressed for each index's type. @@ -5275,13 +5262,19 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt16: { + Register out = out_loc.AsRegister<Register>(); + __ movsxw(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_2, data_offset)); + break; + } + + case DataType::Type::kInt32: { Register out = out_loc.AsRegister<Register>(); __ movl(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -5311,7 +5304,7 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { DCHECK_NE(obj, out_loc.AsRegisterPairLow<Register>()); __ movl(out_loc.AsRegisterPairLow<Register>(), CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); @@ -5321,24 +5314,26 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); __ movss(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); __ movsd(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - if (type == Primitive::kPrimNot || type == Primitive::kPrimLong) { + if (type == DataType::Type::kReference || type == DataType::Type::kInt64) { // Potential implicit null checks, in the case of reference or // long arrays, are handled in the previous switch statement. } else { @@ -5347,20 +5342,19 @@ void InstructionCodeGeneratorX86::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall); - bool is_byte_type = (value_type == Primitive::kPrimBoolean) - || (value_type == Primitive::kPrimByte); + bool is_byte_type = DataType::Size(value_type) == 1u; // We need the inputs to be different than the output in case of long operation. // In case of a byte operation, the register allocator does not support multiple // inputs that die at entry with one in a specific register. @@ -5369,7 +5363,7 @@ void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) { if (is_byte_type) { // Ensure the value is in a byte register. locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2))); - } else if (Primitive::IsFloatingPointType(value_type)) { + } else if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); @@ -5388,7 +5382,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Register array = array_loc.AsRegister<Register>(); Location index = locations->InAt(1); Location value = locations->InAt(2); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); uint32_t class_offset = mirror::Object::ClassOffset().Int32Value(); uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value(); uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); @@ -5397,33 +5391,34 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_1, offset); if (value.IsRegister()) { __ movb(address, value.AsRegister<ByteRegister>()); } else { - __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_2, offset); if (value.IsRegister()) { __ movw(address, value.AsRegister<Register>()); } else { - __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); @@ -5448,7 +5443,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { Location temp_loc = locations->GetTemp(0); Register temp = temp_loc.AsRegister<Register>(); if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { __ testl(register_value, register_value); @@ -5519,7 +5514,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); if (value.IsRegister()) { @@ -5533,7 +5528,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); if (value.IsRegisterPair()) { __ movl(CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, data_offset), @@ -5553,7 +5548,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_4, offset); if (value.IsFpuRegister()) { @@ -5567,7 +5562,7 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); Address address = CodeGeneratorX86::ArrayAddress(array, index, TIMES_8, offset); if (value.IsFpuRegister()) { @@ -5584,14 +5579,16 @@ void InstructionCodeGeneratorX86::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } } void LocationsBuilderX86::VisitArrayLength(HArrayLength* instruction) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction); locations->SetInAt(0, Location::RequiresRegister()); if (!instruction->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -5639,7 +5636,7 @@ void InstructionCodeGeneratorX86::VisitBoundsCheck(HBoundsCheck* instruction) { Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); SlowPathCode* slow_path = - new (GetGraph()->GetArena()) BoundsCheckSlowPathX86(instruction); + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86(instruction); if (length_loc.IsConstant()) { int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); @@ -5701,12 +5698,19 @@ void LocationsBuilderX86::VisitParallelMove(HParallelMove* instruction ATTRIBUTE } void InstructionCodeGeneratorX86::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). @@ -5733,12 +5737,12 @@ void InstructionCodeGeneratorX86::GenerateSuspendCheck(HSuspendCheck* instructio SuspendCheckSlowPathX86* slow_path = down_cast<SuspendCheckSlowPathX86*>(instruction->GetSlowPath()); if (slow_path == nullptr) { - slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86(instruction, successor); + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86(instruction, successor); instruction->SetSlowPath(slow_path); codegen_->AddSlowPath(slow_path); if (successor != nullptr) { DCHECK(successor->IsLoopHeader()); - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); } } else { DCHECK_EQ(slow_path->GetSuccessor(), successor); @@ -5759,24 +5763,18 @@ X86Assembler* ParallelMoveResolverX86::GetAssembler() const { return codegen_->GetAssembler(); } -void ParallelMoveResolverX86::MoveMemoryToMemory32(int dst, int src) { +void ParallelMoveResolverX86::MoveMemoryToMemory(int dst, int src, int number_of_words) { ScratchRegisterScope ensure_scratch( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); -} -void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { - ScratchRegisterScope ensure_scratch( - this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); - Register temp_reg = static_cast<Register>(ensure_scratch.GetRegister()); - int stack_offset = ensure_scratch.IsSpilled() ? kX86WordSize : 0; - __ movl(temp_reg, Address(ESP, src + stack_offset)); - __ movl(Address(ESP, dst + stack_offset), temp_reg); - __ movl(temp_reg, Address(ESP, src + stack_offset + kX86WordSize)); - __ movl(Address(ESP, dst + stack_offset + kX86WordSize), temp_reg); + // Now that temp register is available (possibly spilled), move blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(temp_reg, Address(ESP, src + stack_offset)); + __ movl(Address(ESP, dst + stack_offset), temp_reg); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitMove(size_t index) { @@ -5794,7 +5792,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movl(Address(ESP, destination.GetStackIndex()), source.AsRegister<Register>()); } } else if (source.IsRegisterPair()) { - size_t elem_size = Primitive::ComponentSize(Primitive::kPrimInt); + size_t elem_size = DataType::Size(DataType::Type::kInt32); // Create stack space for 2 elements. __ subl(ESP, Immediate(2 * elem_size)); __ movl(Address(ESP, 0), source.AsRegisterPairLow<Register>()); @@ -5827,7 +5825,7 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movss(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsStackSlot()); - MoveMemoryToMemory32(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } } else if (source.IsDoubleStackSlot()) { if (destination.IsRegisterPair()) { @@ -5838,11 +5836,15 @@ void ParallelMoveResolverX86::EmitMove(size_t index) { __ movsd(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); } else { DCHECK(destination.IsDoubleStackSlot()) << destination; - MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + MoveMemoryToMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5942,7 +5944,16 @@ void ParallelMoveResolverX86::Exchange32(XmmRegister reg, int mem) { __ movd(reg, temp_reg); } -void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { +void ParallelMoveResolverX86::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 4 * kX86WordSize; + __ subl(ESP, Immediate(extra_slot)); + __ movups(Address(ESP, 0), XmmRegister(reg)); + ExchangeMemory(0, mem + extra_slot, 4); + __ movups(XmmRegister(reg), Address(ESP, 0)); + __ addl(ESP, Immediate(extra_slot)); +} + +void ParallelMoveResolverX86::ExchangeMemory(int mem1, int mem2, int number_of_words) { ScratchRegisterScope ensure_scratch1( this, kNoRegister, EAX, codegen_->GetNumberOfCoreRegisters()); @@ -5952,10 +5963,15 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { int stack_offset = ensure_scratch1.IsSpilled() ? kX86WordSize : 0; stack_offset += ensure_scratch2.IsSpilled() ? kX86WordSize : 0; - __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); - __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); - __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); - __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < number_of_words; i++) { + __ movl(static_cast<Register>(ensure_scratch1.GetRegister()), Address(ESP, mem1 + stack_offset)); + __ movl(static_cast<Register>(ensure_scratch2.GetRegister()), Address(ESP, mem2 + stack_offset)); + __ movl(Address(ESP, mem2 + stack_offset), static_cast<Register>(ensure_scratch1.GetRegister())); + __ movl(Address(ESP, mem1 + stack_offset), static_cast<Register>(ensure_scratch2.GetRegister())); + stack_offset += kX86WordSize; + } } void ParallelMoveResolverX86::EmitSwap(size_t index) { @@ -5974,7 +5990,7 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange(destination.AsRegister<Register>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { // Use XOR Swap algorithm to avoid a temporary. DCHECK_NE(source.reg(), destination.reg()); @@ -6010,8 +6026,13 @@ void ParallelMoveResolverX86::EmitSwap(size_t index) { // Move the high double to the low double. __ psrldq(reg, Immediate(8)); } else if (destination.IsDoubleStackSlot() && source.IsDoubleStackSlot()) { - Exchange(destination.GetStackIndex(), source.GetStackIndex()); - Exchange(destination.GetHighStackIndex(kX86WordSize), source.GetHighStackIndex(kX86WordSize)); + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory(destination.GetStackIndex(), source.GetStackIndex(), 4); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented: source: " << source << ", destination: " << destination; } @@ -6034,6 +6055,7 @@ HLoadClass::LoadKind CodeGeneratorX86::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6064,13 +6086,14 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } if (load_kind == HLoadClass::LoadKind::kReferrersClass || load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kBootImageClassTable || load_kind == HLoadClass::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6089,12 +6112,11 @@ void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) { } Label* CodeGeneratorX86::NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite(TypeReference(&dex_file, dex_index), - reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, dex_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -6136,7 +6158,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; } case HLoadClass::LoadKind::kBootImageAddress: { @@ -6147,6 +6169,19 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE __ movl(out, Immediate(address)); break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootImageTypePatch(cls); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ subl(out, Immediate(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { Register method_address = locations->InAt(0).AsRegister<Register>(); Address address(method_address, CodeGeneratorX86::kDummy32BitOffset); @@ -6171,7 +6206,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); @@ -6190,7 +6225,7 @@ void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFE void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -6199,7 +6234,7 @@ void LocationsBuilderX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, @@ -6208,9 +6243,14 @@ void InstructionCodeGeneratorX86::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86::GenerateClassInitializationCheck( SlowPathCode* slow_path, Register class_reg) { - __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(mirror::Class::kStatusInitialized)); - __ j(kLess, slow_path->GetEntryLabel()); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); + __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the X86 memory model. } @@ -6219,6 +6259,7 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -6234,9 +6275,10 @@ HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind( void LocationsBuilderX86::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); HLoadString::LoadKind load_kind = load->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { locations->SetInAt(0, Location::RequiresRegister()); } @@ -6259,12 +6301,11 @@ void LocationsBuilderX86::VisitLoadString(HLoadString* load) { } Label* CodeGeneratorX86::NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite( - StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, dex_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -6281,15 +6322,22 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); Register method_address = locations->InAt(0).AsRegister<Register>(); __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); - codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + codegen_->RecordBootImageStringPatch(load); + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(address)); - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + Register method_address = locations->InAt(0).AsRegister<Register>(); + __ movl(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset)); + codegen_->RecordBootImageStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Register method_address = locations->InAt(0).AsRegister<Register>(); @@ -6297,7 +6345,7 @@ void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) NO_THREAD_S Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::String> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); @@ -6330,7 +6378,7 @@ static Address GetExceptionTlsAddress() { void LocationsBuilderX86::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -6339,7 +6387,7 @@ void InstructionCodeGeneratorX86::VisitLoadException(HLoadException* load) { } void LocationsBuilderX86::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -6347,8 +6395,8 @@ void InstructionCodeGeneratorX86::VisitClearException(HClearException* clear ATT } void LocationsBuilderX86::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6374,7 +6422,7 @@ static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) { // interface pointer, one for loading the current interface. // The other checks have one temp for loading the object's class. static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { return 2; } return 1 + NumberOfInstanceOfTemps(type_check_kind); @@ -6388,11 +6436,12 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -6400,7 +6449,8 @@ void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -6439,12 +6489,14 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<Register>()); } else { @@ -6460,12 +6512,14 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop; @@ -6475,7 +6529,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -6494,12 +6548,14 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. NearLabel loop, success; __ Bind(&loop); @@ -6515,7 +6571,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -6529,12 +6585,14 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. NearLabel exact_check; if (cls.IsRegister()) { @@ -6550,7 +6608,7 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -6577,8 +6635,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(ESP, cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6609,8 +6667,8 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6634,31 +6692,11 @@ void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) { } } -static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) { - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - return !throws_into_catch && !kEmitCompilerReadBarrier; - case TypeCheckKind::kInterfaceCheck: - return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - return false; - } - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); -} - void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) { - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - LocationSummary::CallKind call_kind = - IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch) - ? LocationSummary::kNoCall - : LocationSummary::kCallOnSlowPath; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kInterfaceCheck) { // Require a register for the interface check since there is a loop that compares the class to @@ -6694,15 +6732,10 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = - IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock()); - + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCode* type_check_slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); NearLabel done; @@ -6852,44 +6885,40 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { break; case TypeCheckKind::kInterfaceCheck: { - // Fast path for the interface check. Since we compare with a memory location in the inner - // loop we would need to have cls poisoned. However unpoisoning cls would reset the - // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump - // to the slow path if we are running under poisoning. - if (!kPoisonHeapReferences) { - // Try to avoid read barriers to improve the fast path. We can not get false positives by - // doing this. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); - // Iftable is never null. - __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); - // Loop through the iftable and check if any class matches. - NearLabel start_loop; - __ Bind(&start_loop); - // Need to subtract first to handle the empty array case. - __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2)); - __ j(kNegative, type_check_slow_path->GetEntryLabel()); - // Go to next interface if the classes do not match. - __ cmpl(cls.AsRegister<Register>(), - CodeGeneratorX86::ArrayAddress(temp, - maybe_temp2_loc, - TIMES_4, - object_array_data_offset)); - __ j(kNotEqual, &start_loop); - } else { - __ jmp(type_check_slow_path->GetEntryLabel()); - } + // Fast path for the interface check. Try to avoid read barriers to improve the fast path. + // We can not get false positives by doing this. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + kWithoutReadBarrier); + // Iftable is never null. + __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset)); + // Maybe poison the `cls` for direct comparison with memory. + __ MaybePoisonHeapReference(cls.AsRegister<Register>()); + // Loop through the iftable and check if any class matches. + NearLabel start_loop; + __ Bind(&start_loop); + // Need to subtract first to handle the empty array case. + __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2)); + __ j(kNegative, type_check_slow_path->GetEntryLabel()); + // Go to next interface if the classes do not match. + __ cmpl(cls.AsRegister<Register>(), + CodeGeneratorX86::ArrayAddress(temp, + maybe_temp2_loc, + TIMES_4, + object_array_data_offset)); + __ j(kNotEqual, &start_loop); + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<Register>()); break; } } @@ -6899,8 +6928,8 @@ void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) { } void LocationsBuilderX86::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6923,9 +6952,9 @@ void LocationsBuilderX86::VisitXor(HXor* instruction) { HandleBitwiseOperation(i void LocationsBuilderX86::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt - || instruction->GetResultType() == Primitive::kPrimLong); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32 + || instruction->GetResultType() == DataType::Type::kInt64); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -6949,7 +6978,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr Location second = locations->InAt(1); DCHECK(first.Equals(locations->Out())); - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { if (second.IsRegister()) { if (instruction->IsAnd()) { __ andl(first.AsRegister<Register>(), second.AsRegister<Register>()); @@ -6982,7 +7011,7 @@ void InstructionCodeGeneratorX86::HandleBitwiseOperation(HBinaryOperation* instr } } } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); if (second.IsRegisterPair()) { if (instruction->IsAnd()) { __ andl(first.AsRegisterPairLow<Register>(), second.AsRegisterPairLow<Register>()); @@ -7145,7 +7174,7 @@ void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad( "have different sizes."); // Slow path marking the GC root `root`. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86( instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); @@ -7275,10 +7304,10 @@ void CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* i SlowPathCode* slow_path; if (always_update_field) { DCHECK(temp != nullptr); - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86( + slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86( instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp); } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86( + slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86( instruction, ref, /* unpoison_ref_before_marking */ true); } AddSlowPath(slow_path); @@ -7311,7 +7340,7 @@ void CodeGeneratorX86::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) + SlowPathCode* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathX86(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -7347,7 +7376,7 @@ void CodeGeneratorX86::GenerateReadBarrierForRootSlow(HInstruction* instruction, // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86(instruction, out, root); AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); @@ -7367,7 +7396,7 @@ void InstructionCodeGeneratorX86::VisitBoundType(HBoundType* instruction ATTRIBU // Simple implementation of packed switch - generate cascaded compare/jumps. void LocationsBuilderX86::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); } @@ -7434,7 +7463,7 @@ void InstructionCodeGeneratorX86::VisitPackedSwitch(HPackedSwitch* switch_instr) void LocationsBuilderX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); // Constant area pointer. @@ -7489,7 +7518,7 @@ void InstructionCodeGeneratorX86::VisitX86PackedSwitch(HX86PackedSwitch* switch_ void LocationsBuilderX86::VisitX86ComputeBaseMethodAddress( HX86ComputeBaseMethodAddress* insn) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -7513,7 +7542,7 @@ void InstructionCodeGeneratorX86::VisitX86ComputeBaseMethodAddress( void LocationsBuilderX86::VisitX86LoadFromConstantTable( HX86LoadFromConstantTable* insn) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(insn, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(insn, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::ConstantLocation(insn->GetConstant())); @@ -7524,12 +7553,12 @@ void LocationsBuilderX86::VisitX86LoadFromConstantTable( } switch (insn->GetType()) { - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetOut(Location::RequiresRegister()); break; @@ -7549,19 +7578,19 @@ void InstructionCodeGeneratorX86::VisitX86LoadFromConstantTable(HX86LoadFromCons HConstant *value = insn->GetConstant(); switch (insn->GetType()) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: __ movss(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralFloatAddress( value->AsFloatConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: __ movsd(out.AsFpuRegister<XmmRegister>(), codegen_->LiteralDoubleAddress( value->AsDoubleConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ movl(out.AsRegister<Register>(), codegen_->LiteralInt32Address( value->AsIntConstant()->GetValue(), insn->GetBaseMethodAddress(), const_area)); @@ -7673,28 +7702,31 @@ Address CodeGeneratorX86::LiteralDoubleAddress(double v, HX86ComputeBaseMethodAddress* method_base, Register reg) { AssemblerFixup* fixup = - new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddDouble(v)); + new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddDouble(v)); return Address(reg, kDummy32BitOffset, fixup); } Address CodeGeneratorX86::LiteralFloatAddress(float v, HX86ComputeBaseMethodAddress* method_base, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddFloat(v)); + AssemblerFixup* fixup = + new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddFloat(v)); return Address(reg, kDummy32BitOffset, fixup); } Address CodeGeneratorX86::LiteralInt32Address(int32_t v, HX86ComputeBaseMethodAddress* method_base, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt32(v)); + AssemblerFixup* fixup = + new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt32(v)); return Address(reg, kDummy32BitOffset, fixup); } Address CodeGeneratorX86::LiteralInt64Address(int64_t v, HX86ComputeBaseMethodAddress* method_base, Register reg) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, method_base, __ AddInt64(v)); + AssemblerFixup* fixup = + new (GetGraph()->GetAllocator()) RIPFixup(*this, method_base, __ AddInt64(v)); return Address(reg, kDummy32BitOffset, fixup); } @@ -7744,7 +7776,7 @@ Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, Register value) { // Create a fixup to be used to create and address the jump table. JumpTableRIPFixup* table_fixup = - new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); + new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); // We have to populate the jump tables. fixups_to_jump_tables_.push_back(table_fixup); @@ -7754,13 +7786,13 @@ Address CodeGeneratorX86::LiteralCaseTable(HX86PackedSwitch* switch_instr, } // TODO: target as memory. -void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type type) { +void CodeGeneratorX86::MoveFromReturnRegister(Location target, DataType::Type type) { if (!target.IsValid()) { - DCHECK_EQ(type, Primitive::kPrimVoid); + DCHECK_EQ(type, DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); Location return_loc = InvokeDexCallingConventionVisitorX86().GetReturnLocation(type); if (target.Equals(return_loc)) { @@ -7769,14 +7801,14 @@ void CodeGeneratorX86::MoveFromReturnRegister(Location target, Primitive::Type t // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged // with the else branch. - if (type == Primitive::kPrimLong) { - HParallelMove parallel_move(GetGraph()->GetArena()); - parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), Primitive::kPrimInt, nullptr); - parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), Primitive::kPrimInt, nullptr); + if (type == DataType::Type::kInt64) { + HParallelMove parallel_move(GetGraph()->GetAllocator()); + parallel_move.AddMove(return_loc.ToLow(), target.ToLow(), DataType::Type::kInt32, nullptr); + parallel_move.AddMove(return_loc.ToHigh(), target.ToHigh(), DataType::Type::kInt32, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } else { // Let the parallel move resolver take care of all of this. - HParallelMove parallel_move(GetGraph()->GetArena()); + HParallelMove parallel_move(GetGraph()->GetAllocator()); parallel_move.AddMove(return_loc, target, type, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } @@ -7796,22 +7828,28 @@ void CodeGeneratorX86::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto it = jit_string_roots_.find( - StringReference(&info.dex_file, dex::StringIndex(info.index))); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto it = jit_class_roots_.find( - TypeReference(&info.dex_file, dex::TypeIndex(info.index))); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } } +void LocationsBuilderX86::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + #undef __ } // namespace x86 diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f48753b614..51e5bca00b 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -20,7 +20,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "base/enums.h" #include "code_generator.h" -#include "dex_file_types.h" +#include "dex/dex_file_types.h" #include "driver/compiler_options.h" #include "nodes.h" #include "parallel_move_resolver.h" @@ -83,8 +83,8 @@ class InvokeDexCallingConventionVisitorX86 : public InvokeDexCallingConventionVi InvokeDexCallingConventionVisitorX86() {} virtual ~InvokeDexCallingConventionVisitorX86() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type type) const OVERRIDE; Location GetMethodLocation() const OVERRIDE; private: @@ -103,13 +103,13 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const OVERRIDE { return Location::RegisterLocation(EAX); } - Location GetReturnLocation(Primitive::Type type) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetReturnLocation(DataType::Type type) const OVERRIDE { + return DataType::Is64BitType(type) ? Location::RegisterPairLocation(EAX, EDX) : Location::RegisterLocation(EAX); } - Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE { - return Primitive::Is64BitType(type) + Location GetSetValueLocation(DataType::Type type, bool is_instance) const OVERRIDE { + return DataType::Is64BitType(type) ? (is_instance ? Location::RegisterPairLocation(EDX, EBX) : Location::RegisterPairLocation(ECX, EDX)) @@ -117,7 +117,7 @@ class FieldAccessCallingConventionX86 : public FieldAccessCallingConvention { ? Location::RegisterLocation(EDX) : Location::RegisterLocation(ECX)); } - Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::FpuRegisterLocation(XMM0); } @@ -139,10 +139,10 @@ class ParallelMoveResolverX86 : public ParallelMoveResolverWithSwap { private: void Exchange(Register reg, int mem); - void Exchange(int mem1, int mem2); void Exchange32(XmmRegister reg, int mem); - void MoveMemoryToMemory32(int dst, int src); - void MoveMemoryToMemory64(int dst, int src); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory(int mem1, int mem2, int number_of_words); + void MoveMemoryToMemory(int dst, int src, int number_of_words); CodeGeneratorX86* const codegen_; @@ -321,7 +321,7 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateFrameExit() OVERRIDE; void Bind(HBasicBlock* block) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -414,24 +414,23 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address, - MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle); Label* NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; // Emit linker patches. - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, @@ -456,8 +455,8 @@ class CodeGeneratorX86 : public CodeGenerator { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { - return type == Primitive::kPrimLong; + bool NeedsTwoRegisters(DataType::Type type) const OVERRIDE { + return type == DataType::Type::kInt64; } bool ShouldSplitLongMoves() const OVERRIDE { return true; } @@ -610,16 +609,16 @@ class CodeGeneratorX86 : public CodeGenerator { private: struct X86PcRelativePatchInfo : PatchInfo<Label> { X86PcRelativePatchInfo(HX86ComputeBaseMethodAddress* address, - const DexFile& target_dex_file, + const DexFile* target_dex_file, uint32_t target_index) : PatchInfo(target_dex_file, target_index), method_address(address) {} HX86ComputeBaseMethodAddress* method_address; }; - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> void EmitPcRelativeLinkerPatches(const ArenaDeque<X86PcRelativePatchInfo>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp); @@ -640,8 +639,10 @@ class CodeGeneratorX86 : public CodeGenerator { ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<X86PcRelativePatchInfo> string_patches_; + // String patch locations; type depends on configuration (intern table or boot image PIC). + ArenaDeque<X86PcRelativePatchInfo> boot_image_string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<X86PcRelativePatchInfo> string_bss_entry_patches_; // Patches for string root accesses in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 86f6d51734..7be360536b 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -17,12 +17,15 @@ #include "code_generator_x86_64.h" #include "art_method.h" +#include "class_table.h" #include "code_generator_utils.h" #include "compiled_method.h" #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_x86_64.h" +#include "linker/linker_patch.h" #include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" @@ -103,12 +106,12 @@ class DivZeroCheckSlowPathX86_64 : public SlowPathCode { class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { public: - DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div) + DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div) : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {} void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { __ Bind(GetEntryLabel()); - if (type_ == Primitive::kPrimInt) { + if (type_ == DataType::Type::kInt32) { if (is_div_) { __ negl(cpu_reg_); } else { @@ -116,7 +119,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { } } else { - DCHECK_EQ(Primitive::kPrimLong, type_); + DCHECK_EQ(DataType::Type::kInt64, type_); if (is_div_) { __ negq(cpu_reg_); } else { @@ -130,7 +133,7 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCode { private: const CpuRegister cpu_reg_; - const Primitive::Type type_; + const DataType::Type type_; const bool is_div_; DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); }; @@ -192,7 +195,8 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) { // Load the array length into our temporary. - uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength()); + HArrayLength* length = array_length->AsArrayLength(); + uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length); Location array_loc = array_length->GetLocations()->InAt(0); Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset); length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1)); @@ -202,7 +206,7 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2)); } __ movl(length_loc.AsRegister<CpuRegister>(), array_len); - if (mirror::kUseStringCompression) { + if (mirror::kUseStringCompression && length->IsStringLength()) { __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1)); } } @@ -212,10 +216,10 @@ class BoundsCheckSlowPathX86_64 : public SlowPathCode { codegen->EmitParallelMoves( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimInt, + DataType::Type::kInt32, length_loc, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt); + DataType::Type::kInt32); QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt() ? kQuickThrowStringBounds : kQuickThrowArrayBounds; @@ -269,15 +273,6 @@ class LoadClassSlowPathX86_64 : public SlowPathCode { } RestoreLiveRegisters(codegen, locations); - // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry. - DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_); - if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) { - DCHECK(out.IsValid()); - __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), - locations->Out().AsRegister<CpuRegister>()); - Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_); - __ Bind(fixup_label); - } __ jmp(GetExitLabel()); } @@ -319,12 +314,6 @@ class LoadStringSlowPathX86_64 : public SlowPathCode { x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX)); RestoreLiveRegisters(codegen, locations); - // Store the resolved String to the BSS entry. - __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false), - locations->Out().AsRegister<CpuRegister>()); - Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString()); - __ Bind(fixup_label); - __ jmp(GetExitLabel()); } @@ -348,7 +337,14 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen); __ Bind(GetEntryLabel()); - if (!is_fatal_) { + if (kPoisonHeapReferences && + instruction_->IsCheckCast() && + instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) { + // First, unpoison the `cls` reference that was poisoned for direct memory comparison. + __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>()); + } + + if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) { SaveLiveRegisters(codegen, locations); } @@ -357,10 +353,10 @@ class TypeCheckSlowPathX86_64 : public SlowPathCode { InvokeRuntimeCallingConvention calling_convention; codegen->EmitParallelMoves(locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot); + DataType::Type::kReference); if (instruction_->IsInstanceOf()) { x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this); CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>(); @@ -424,21 +420,21 @@ class ArraySetSlowPathX86_64 : public SlowPathCode { SaveLiveRegisters(codegen, locations); InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove( locations->InAt(0), Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove( locations->InAt(1), Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); parallel_move.AddMove( locations->InAt(2), Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); @@ -828,19 +824,19 @@ class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode { // We're moving two or three locations to locations that could // overlap, so we need a parallel move resolver. InvokeRuntimeCallingConvention calling_convention; - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); parallel_move.AddMove(ref_, Location::RegisterLocation(calling_convention.GetRegisterAt(0)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); parallel_move.AddMove(obj_, Location::RegisterLocation(calling_convention.GetRegisterAt(1)), - Primitive::kPrimNot, + DataType::Type::kReference, nullptr); if (index.IsValid()) { parallel_move.AddMove(index, Location::RegisterLocation(calling_convention.GetRegisterAt(2)), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); codegen->GetMoveResolver()->EmitNativeCode(¶llel_move); } else { @@ -966,7 +962,7 @@ inline Condition X86_64FPCondition(IfCondition cond) { case kCondGT: return kAbove; case kCondGE: return kAboveEqual; default: break; // should not happen - }; + } LOG(FATAL) << "Unreachable"; UNREACHABLE(); } @@ -997,7 +993,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( DCHECK(GetCompilerOptions().IsBootImage()); __ leal(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - RecordBootMethodPatch(invoke); + RecordBootImageMethodPatch(invoke); break; case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress: Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress()); @@ -1005,9 +1001,7 @@ void CodeGeneratorX86_64::GenerateStaticOrDirectCall( case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: { __ movq(temp.AsRegister<CpuRegister>(), Address::Absolute(kDummy32BitOffset, /* no_rip */ false)); - // Bind a new fixup label at the end of the "movl" insn. - __ Bind(NewMethodBssEntryPatch( - MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()))); + RecordMethodBssEntryPatch(invoke); break; } case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: { @@ -1065,80 +1059,87 @@ void CodeGeneratorX86_64::GenerateVirtualCall( RecordPcInfo(invoke, invoke->GetDexPc(), slow_path); } -void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) { - boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file, - invoke->GetTargetMethod().dex_method_index); +void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) { + boot_image_method_patches_.emplace_back( + invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index); __ Bind(&boot_image_method_patches_.back().label); } -Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) { - // Add a patch entry and return the label. - method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index); - return &method_bss_entry_patches_.back().label; +void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) { + method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()); + __ Bind(&method_bss_entry_patches_.back().label); } -void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) { - boot_image_type_patches_.emplace_back(load_class->GetDexFile(), - load_class->GetTypeIndex().index_); +void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) { + boot_image_type_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); __ Bind(&boot_image_type_patches_.back().label); } Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) { - type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_); + type_bss_entry_patches_.emplace_back( + &load_class->GetDexFile(), load_class->GetTypeIndex().index_); return &type_bss_entry_patches_.back().label; } -void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) { - DCHECK(GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - __ Bind(&string_patches_.back().label); +void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) { + boot_image_string_patches_.emplace_back( + &load_string->GetDexFile(), load_string->GetStringIndex().index_); + __ Bind(&boot_image_string_patches_.back().label); } Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) { DCHECK(!GetCompilerOptions().IsBootImage()); - string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_); - return &string_patches_.back().label; + string_bss_entry_patches_.emplace_back( + &load_string->GetDexFile(), load_string->GetStringIndex().index_); + return &string_bss_entry_patches_.back().label; } // The label points to the end of the "movl" or another instruction but the literal offset // for method patch needs to point to the embedded constant which occupies the last 4 bytes. constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u; -template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> +template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches( const ArenaDeque<PatchInfo<Label>>& infos, - ArenaVector<LinkerPatch>* linker_patches) { + ArenaVector<linker::LinkerPatch>* linker_patches) { for (const PatchInfo<Label>& info : infos) { uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment; linker_patches->push_back( - Factory(literal_offset, &info.dex_file, info.label.Position(), info.index)); + Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index)); } } -void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) { +void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) { DCHECK(linker_patches->empty()); size_t size = boot_image_method_patches_.size() + method_bss_entry_patches_.size() + boot_image_type_patches_.size() + type_bss_entry_patches_.size() + - string_patches_.size(); + boot_image_string_patches_.size() + + string_bss_entry_patches_.size(); linker_patches->reserve(size); if (GetCompilerOptions().IsBootImage()) { - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>( + boot_image_method_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>( + boot_image_string_patches_, linker_patches); } else { DCHECK(boot_image_method_patches_.empty()); - DCHECK(boot_image_type_patches_.empty()); - EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches); - } - EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_, - linker_patches); - EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_, - linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeClassTablePatch>( + boot_image_type_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringInternTablePatch>( + boot_image_string_patches_, linker_patches); + } + EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>( + method_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>( + type_bss_entry_patches_, linker_patches); + EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>( + string_bss_entry_patches_, linker_patches); DCHECK_EQ(size, linker_patches->size()); } @@ -1221,18 +1222,19 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this), - assembler_(graph->GetArena()), + move_resolver_(graph->GetAllocator(), this), + assembler_(graph->GetAllocator()), isa_features_(isa_features), constant_area_start_(0), - boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), - fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) { + boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)), + fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) { AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } @@ -1265,9 +1267,15 @@ void CodeGeneratorX86_64::GenerateFrameEntry() { && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (GetCompilerOptions().CountHotnessInCompiledCode()) { + __ addw(Address(CpuRegister(kMethodRegisterArgument), + ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } + if (!skip_overflow_check) { - __ testq(CpuRegister(RAX), Address( - CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); + size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); + __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } @@ -1433,7 +1441,7 @@ void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) { } void CodeGeneratorX86_64::MoveLocation( - Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) { + Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) { Move(dst, src); } @@ -1446,13 +1454,21 @@ void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* } void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) { - DCHECK(!successor->IsExitBlock()); + if (successor->IsExitBlock()) { + DCHECK(got->GetPrevious()->AlwaysThrows()); + return; // no code needed + } HBasicBlock* block = got->GetBlock(); HInstruction* previous = got->GetPrevious(); HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { + if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); + __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), + Immediate(1)); + } GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } @@ -1508,22 +1524,23 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) Location left = locations->InAt(0); Location right = locations->InAt(1); - Primitive::Type type = condition->InputAt(0)->GetType(); + DataType::Type type = condition->InputAt(0)->GetType(); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kReference: { codegen_->GenerateIntCompare(left, right); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { codegen_->GenerateLongCompare(left, right); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (right.IsFpuRegister()) { __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); } else if (right.IsConstant()) { @@ -1537,7 +1554,7 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) } break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (right.IsFpuRegister()) { __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>()); } else if (right.IsConstant()) { @@ -1570,17 +1587,17 @@ void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* co GenerateCompareTest(condition); // Now generate the correct jump(s). - Primitive::Type type = condition->InputAt(0)->GetType(); + DataType::Type type = condition->InputAt(0)->GetType(); switch (type) { - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { __ j(X86_64IntegerCondition(condition->GetCondition()), true_target); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { GenerateFPJumps(condition, true_target, false_target); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { GenerateFPJumps(condition, true_target, false_target); break; } @@ -1603,7 +1620,7 @@ static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) { // conditions if they are materialized due to the complex branching. return cond->IsCondition() && cond->GetNext() == branch && - !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType()); + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()); } template<class LabelType> @@ -1667,8 +1684,8 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc // If this is a long or FP comparison that has been folded into // the HCondition, generate the comparison directly. - Primitive::Type type = condition->InputAt(0)->GetType(); - if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) { + DataType::Type type = condition->InputAt(0)->GetType(); + if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) { GenerateCompareTestAndBranch(condition, true_target, false_target); return; } @@ -1691,7 +1708,7 @@ void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruc } void LocationsBuilderX86_64::VisitIf(HIf* if_instr) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr); if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) { locations->SetInAt(0, Location::Any()); } @@ -1708,7 +1725,7 @@ void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) { } void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath); InvokeRuntimeCallingConvention calling_convention; RegisterSet caller_saves = RegisterSet::Empty(); @@ -1728,7 +1745,7 @@ void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) { } void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) { - LocationSummary* locations = new (GetGraph()->GetArena()) + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(flag, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -1740,14 +1757,14 @@ void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimize static bool SelectCanUseCMOV(HSelect* select) { // There are no conditional move instructions for XMMs. - if (Primitive::IsFloatingPointType(select->GetType())) { + if (DataType::IsFloatingPointType(select->GetType())) { return false; } // A FP condition doesn't generate the single CC that we need. HInstruction* condition = select->GetCondition(); if (condition->IsCondition() && - Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) { + DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) { return false; } @@ -1756,8 +1773,8 @@ static bool SelectCanUseCMOV(HSelect* select) { } void LocationsBuilderX86_64::VisitSelect(HSelect* select) { - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select); - if (Primitive::IsFloatingPointType(select->GetType())) { + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select); + if (DataType::IsFloatingPointType(select->GetType())) { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); } else { @@ -1816,7 +1833,7 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { // If the condition is true, overwrite the output, which already contains false. // Generate the correct sized CMOV. - bool is_64_bit = Primitive::Is64BitType(select->GetType()); + bool is_64_bit = DataType::Is64BitType(select->GetType()); if (value_true_loc.IsRegister()) { __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit); } else { @@ -1836,7 +1853,7 @@ void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) { } void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) { - new (GetGraph()->GetArena()) LocationSummary(info); + new (GetGraph()->GetAllocator()) LocationSummary(info); } void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) { @@ -1849,15 +1866,15 @@ void CodeGeneratorX86_64::GenerateNop() { void LocationsBuilderX86_64::HandleCondition(HCondition* cond) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall); // Handle the long/FP comparisons made in instruction simplification. switch (cond->InputAt(0)->GetType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); break; @@ -1892,14 +1909,14 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { codegen_->GenerateIntCompare(lhs, rhs); __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // Clear output register: setcc only sets the low byte. __ xorl(reg, reg); codegen_->GenerateLongCompare(lhs, rhs); __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg); return; - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); if (rhs.IsConstant()) { float value = rhs.GetConstant()->AsFloatConstant()->GetValue(); @@ -1912,7 +1929,7 @@ void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) { GenerateFPJumps(cond, &true_label, &false_label); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>(); if (rhs.IsConstant()) { double value = rhs.GetConstant()->AsDoubleConstant()->GetValue(); @@ -2023,21 +2040,22 @@ void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) { void LocationsBuilderX86_64::VisitCompare(HCompare* compare) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall); switch (compare->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresRegister()); @@ -2055,23 +2073,24 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { Location right = locations->InAt(1); NearLabel less, greater, done; - Primitive::Type type = compare->InputAt(0)->GetType(); + DataType::Type type = compare->InputAt(0)->GetType(); Condition less_cond = kLess; switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { codegen_->GenerateIntCompare(left, right); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { codegen_->GenerateLongCompare(left, right); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); if (right.IsConstant()) { float value = right.GetConstant()->AsFloatConstant()->GetValue(); @@ -2085,7 +2104,7 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { less_cond = kBelow; // ucomis{s,d} sets CF break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { XmmRegister left_reg = left.AsFpuRegister<XmmRegister>(); if (right.IsConstant()) { double value = right.GetConstant()->AsDoubleConstant()->GetValue(); @@ -2119,7 +2138,7 @@ void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) { void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2129,7 +2148,7 @@ void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATT void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2139,7 +2158,7 @@ void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant A void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2149,7 +2168,7 @@ void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant A void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2159,7 +2178,7 @@ void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall); locations->SetOut(Location::ConstantLocation(constant)); } @@ -2195,20 +2214,21 @@ void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall); switch (ret->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RegisterLocation(RAX)); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::FpuRegisterLocation(XMM0)); break; @@ -2220,18 +2240,19 @@ void LocationsBuilderX86_64::VisitReturn(HReturn* ret) { void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { if (kIsDebugBuild) { switch (ret->InputAt(0)->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(), XMM0); break; @@ -2243,22 +2264,25 @@ void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) { codegen_->GenerateFrameExit(); } -Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const { +Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: - case Primitive::kPrimLong: + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kUint32: + case DataType::Type::kInt32: + case DataType::Type::kUint64: + case DataType::Type::kInt64: return Location::RegisterLocation(RAX); - case Primitive::kPrimVoid: + case DataType::Type::kVoid: return Location::NoLocation(); - case Primitive::kPrimDouble: - case Primitive::kPrimFloat: + case DataType::Type::kFloat64: + case DataType::Type::kFloat32: return Location::FpuRegisterLocation(XMM0); } @@ -2269,14 +2293,15 @@ Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const { return Location::RegisterLocation(kMethodRegisterArgument); } -Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) { +Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kReference: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { uint32_t index = gp_index_++; stack_index_++; if (index < calling_convention.GetNumberOfRegisters()) { @@ -2286,7 +2311,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ } } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t index = gp_index_; stack_index_ += 2; if (index < calling_convention.GetNumberOfRegisters()) { @@ -2298,7 +2323,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ } } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t index = float_index_++; stack_index_++; if (index < calling_convention.GetNumberOfFpuRegisters()) { @@ -2308,7 +2333,7 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ } } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t index = float_index_++; stack_index_ += 2; if (index < calling_convention.GetNumberOfFpuRegisters()) { @@ -2318,7 +2343,9 @@ Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Typ } } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected parameter type " << type; break; } @@ -2457,16 +2484,16 @@ void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* void LocationsBuilderX86_64::VisitNeg(HNeg* neg) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall); switch (neg->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -2482,19 +2509,19 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { Location out = locations->Out(); Location in = locations->InAt(0); switch (neg->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); __ negl(out.AsRegister<CpuRegister>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: DCHECK(in.IsRegister()); DCHECK(in.Equals(out)); __ negq(out.AsRegister<CpuRegister>()); break; - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { DCHECK(in.Equals(out)); XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement float negation with an exclusive or with value @@ -2505,7 +2532,7 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { DCHECK(in.Equals(out)); XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>(); // Implement double negation with an exclusive or with value @@ -2523,71 +2550,35 @@ void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) { void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); - - // The Java language does not allow treating boolean as an integral type but - // our bit representation makes it safe. + new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; switch (result_type) { - case Primitive::kPrimByte: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to byte is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimShort: - switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + DCHECK(DataType::IsIntegralType(input_type)) << input_type; + locations->SetInAt(0, Location::Any()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; @@ -2598,29 +2589,26 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: // TODO: We would benefit from a (to-be-implemented) // Location::RegisterOrStackSlot requirement for this input. locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-long' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-long' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); break; @@ -2631,47 +2619,24 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { } break; - case Primitive::kPrimChar: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to char is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `int-to-char' instruction. - locations->SetInAt(0, Location::Any()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: - switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -2679,30 +2644,27 @@ void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) { default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); break; @@ -2723,20 +2685,42 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver LocationSummary* locations = conversion->GetLocations(); Location out = locations->Out(); Location in = locations->InAt(0); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); - DCHECK_NE(result_type, input_type); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " -> " << result_type; switch (result_type) { - case Primitive::kPrimByte: + case DataType::Type::kUint8: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to byte is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-byte' instruction. + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + if (in.IsRegister()) { + __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); + } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { + __ movzxb(out.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } else { + __ movl(out.AsRegister<CpuRegister>(), + Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant())))); + } + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt8: + switch (input_type) { + case DataType::Type::kUint8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: if (in.IsRegister()) { __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { @@ -2754,16 +2738,34 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimShort: + case DataType::Type::kUint16: + switch (input_type) { + case DataType::Type::kInt8: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: + if (in.IsRegister()) { + __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); + } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { + __ movzxw(out.AsRegister<CpuRegister>(), + Address(CpuRegister(RSP), in.GetStackIndex())); + } else { + __ movl(out.AsRegister<CpuRegister>(), + Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); + } + break; + + default: + LOG(FATAL) << "Unexpected type conversion from " << input_type + << " to " << result_type; + } + break; + + case DataType::Type::kInt16: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to short is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-short' instruction. + case DataType::Type::kUint16: + case DataType::Type::kInt32: + case DataType::Type::kInt64: if (in.IsRegister()) { __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { @@ -2781,10 +2783,9 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: - // Processing a Dex `long-to-int' instruction. + case DataType::Type::kInt64: if (in.IsRegister()) { __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); } else if (in.IsDoubleStackSlot()) { @@ -2798,8 +2799,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimFloat: { - // Processing a Dex `float-to-int' instruction. + case DataType::Type::kFloat32: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); NearLabel done, nan; @@ -2820,8 +2820,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver break; } - case Primitive::kPrimDouble: { - // Processing a Dex `double-to-int' instruction. + case DataType::Type::kFloat64: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); NearLabel done, nan; @@ -2848,22 +2847,20 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { DCHECK(out.IsRegister()); - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-long' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: DCHECK(in.IsRegister()); __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); break; - case Primitive::kPrimFloat: { - // Processing a Dex `float-to-long' instruction. + case DataType::Type::kFloat32: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); NearLabel done, nan; @@ -2884,8 +2881,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver break; } - case Primitive::kPrimDouble: { - // Processing a Dex `double-to-long' instruction. + case DataType::Type::kFloat64: { XmmRegister input = in.AsFpuRegister<XmmRegister>(); CpuRegister output = out.AsRegister<CpuRegister>(); NearLabel done, nan; @@ -2912,42 +2908,14 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimChar: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimLong: - // Type conversion from long to char is a result of code transformations. - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - // Processing a Dex `int-to-char' instruction. - if (in.IsRegister()) { - __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>()); - } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) { - __ movzxw(out.AsRegister<CpuRegister>(), - Address(CpuRegister(RSP), in.GetStackIndex())); - } else { - __ movl(out.AsRegister<CpuRegister>(), - Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant())))); - } - break; - - default: - LOG(FATAL) << "Unexpected type conversion from " << input_type - << " to " << result_type; - } - break; - - case Primitive::kPrimFloat: - switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-float' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: if (in.IsRegister()) { __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); } else if (in.IsConstant()) { @@ -2960,8 +2928,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-float' instruction. + case DataType::Type::kInt64: if (in.IsRegister()) { __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); } else if (in.IsConstant()) { @@ -2974,8 +2941,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimDouble: - // Processing a Dex `double-to-float' instruction. + case DataType::Type::kFloat64: if (in.IsFpuRegister()) { __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); } else if (in.IsConstant()) { @@ -2991,18 +2957,17 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - // Boolean input is a result of code transformations. - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - // Processing a Dex `int-to-double' instruction. + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: if (in.IsRegister()) { __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false); } else if (in.IsConstant()) { @@ -3015,8 +2980,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimLong: - // Processing a Dex `long-to-double' instruction. + case DataType::Type::kInt64: if (in.IsRegister()) { __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true); } else if (in.IsConstant()) { @@ -3029,8 +2993,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver } break; - case Primitive::kPrimFloat: - // Processing a Dex `float-to-double' instruction. + case DataType::Type::kFloat32: if (in.IsFpuRegister()) { __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>()); } else if (in.IsConstant()) { @@ -3046,7 +3009,7 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver default: LOG(FATAL) << "Unexpected type conversion from " << input_type << " to " << result_type; - }; + } break; default: @@ -3057,16 +3020,16 @@ void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conver void LocationsBuilderX86_64::VisitAdd(HAdd* add) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1))); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); // We can use a leaq or addq if the constant can fit in an immediate. locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1))); @@ -3074,8 +3037,8 @@ void LocationsBuilderX86_64::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimDouble: - case Primitive::kPrimFloat: { + case DataType::Type::kFloat64: + case DataType::Type::kFloat32: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -3094,7 +3057,7 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { Location out = locations->Out(); switch (add->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (second.IsRegister()) { if (out.AsRegister<Register>() == first.AsRegister<Register>()) { __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); @@ -3119,7 +3082,7 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsRegister()) { if (out.AsRegister<Register>() == first.AsRegister<Register>()) { __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); @@ -3144,7 +3107,7 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3159,7 +3122,7 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3181,22 +3144,22 @@ void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) { void LocationsBuilderX86_64::VisitSub(HSub* sub) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall); switch (sub->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -3213,7 +3176,7 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { Location second = locations->InAt(1); DCHECK(first.Equals(locations->Out())); switch (sub->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (second.IsRegister()) { __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); } else if (second.IsConstant()) { @@ -3224,7 +3187,7 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsConstant()) { int64_t value = second.GetConstant()->AsLongConstant()->GetValue(); DCHECK(IsInt<32>(value)); @@ -3235,7 +3198,7 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3250,7 +3213,7 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3272,9 +3235,9 @@ void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) { void LocationsBuilderX86_64::VisitMul(HMul* mul) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall); switch (mul->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (mul->InputAt(1)->IsIntConstant()) { @@ -3285,7 +3248,7 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); if (mul->InputAt(1)->IsLongConstant() && @@ -3297,8 +3260,8 @@ void LocationsBuilderX86_64::VisitMul(HMul* mul) { } break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -3316,7 +3279,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { Location second = locations->InAt(1); Location out = locations->Out(); switch (mul->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: // The constant may have ended up in a register, so test explicitly to avoid // problems where the output may not be the same as the first operand. if (mul->InputAt(1)->IsIntConstant()) { @@ -3332,7 +3295,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { Address(CpuRegister(RSP), second.GetStackIndex())); } break; - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { // The constant may have ended up in a register, so test explicitly to avoid // problems where the output may not be the same as the first operand. if (mul->InputAt(1)->IsLongConstant()) { @@ -3357,7 +3320,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { DCHECK(first.Equals(out)); if (second.IsFpuRegister()) { __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); @@ -3373,7 +3336,7 @@ void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { DCHECK(first.Equals(out)); if (second.IsFpuRegister()) { __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); @@ -3417,9 +3380,9 @@ void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t t } void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) { - Primitive::Type type = rem->GetResultType(); - bool is_float = type == Primitive::kPrimFloat; - size_t elem_size = Primitive::ComponentSize(type); + DataType::Type type = rem->GetResultType(); + bool is_float = type == DataType::Type::kFloat32; + size_t elem_size = DataType::Size(type); LocationSummary* locations = rem->GetLocations(); Location first = locations->InAt(0); Location second = locations->InAt(1); @@ -3483,7 +3446,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr DCHECK(imm == 1 || imm == -1); switch (instruction->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (instruction->IsRem()) { __ xorl(output_register, output_register); } else { @@ -3495,7 +3458,7 @@ void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instr break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (instruction->IsRem()) { __ xorl(output_register, output_register); } else { @@ -3525,7 +3488,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>(); - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { __ leal(tmp, Address(numerator, abs_imm - 1)); __ testl(numerator, numerator); __ cmov(kGreaterEqual, tmp, numerator); @@ -3538,7 +3501,7 @@ void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) { __ movl(output_register, tmp); } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>(); codegen_->Load64BitValue(rdx, abs_imm - 1); @@ -3581,7 +3544,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat int shift; // TODO: can these branches be written as one? - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { int imm = second.GetConstant()->AsIntConstant()->GetValue(); CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift); @@ -3616,7 +3579,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat } else { int64_t imm = second.GetConstant()->AsLongConstant()->GetValue(); - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister rax = eax; CpuRegister rdx = edx; @@ -3669,8 +3632,8 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperat void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) { DCHECK(instruction->IsDiv() || instruction->IsRem()); - Primitive::Type type = instruction->GetResultType(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DataType::Type type = instruction->GetResultType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); bool is_div = instruction->IsDiv(); LocationSummary* locations = instruction->GetLocations(); @@ -3696,7 +3659,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in } } else { SlowPathCode* slow_path = - new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64( + new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64( instruction, out.AsRegister(), type, is_div); codegen_->AddSlowPath(slow_path); @@ -3704,7 +3667,7 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in // 0x80000000(00000000)/-1 triggers an arithmetic exception! // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000) // so it's safe to just use negl instead of more complex comparisons. - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ cmpl(second_reg, Immediate(-1)); __ j(kEqual, slow_path->GetEntryLabel()); // edx:eax <- sign-extended of eax @@ -3725,10 +3688,10 @@ void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* in void LocationsBuilderX86_64::VisitDiv(HDiv* div) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall); switch (div->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RegisterLocation(RAX)); locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1))); locations->SetOut(Location::SameAsFirstInput()); @@ -3743,8 +3706,8 @@ void LocationsBuilderX86_64::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -3762,15 +3725,15 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { Location second = locations->InAt(1); DCHECK(first.Equals(locations->Out())); - Primitive::Type type = div->GetResultType(); + DataType::Type type = div->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GenerateDivRemIntegral(div); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (second.IsFpuRegister()) { __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3785,7 +3748,7 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (second.IsFpuRegister()) { __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>()); } else if (second.IsConstant()) { @@ -3806,13 +3769,13 @@ void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) { } void LocationsBuilderX86_64::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RegisterLocation(RAX)); locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1))); // Intel uses rdx:rax as the dividend and puts the remainder in rdx @@ -3826,8 +3789,8 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { locations->SetInAt(0, Location::Any()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::RequiresFpuRegister()); @@ -3841,15 +3804,15 @@ void LocationsBuilderX86_64::VisitRem(HRem* rem) { } void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) { - Primitive::Type type = rem->GetResultType(); + DataType::Type type = rem->GetResultType(); switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { GenerateDivRemIntegral(rem); break; } - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: { + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: { GenerateRemFP(rem); break; } @@ -3865,18 +3828,19 @@ void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) { SlowPathCode* slow_path = - new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction); + new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); Location value = locations->InAt(0); switch (instruction->GetType()) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (value.IsRegister()) { __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); __ j(kEqual, slow_path->GetEntryLabel()); @@ -3891,7 +3855,7 @@ void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instructio } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (value.IsRegister()) { __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>()); __ j(kEqual, slow_path->GetEntryLabel()); @@ -3915,11 +3879,11 @@ void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) { DCHECK(op->IsShl() || op->IsShr() || op->IsUShr()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall); switch (op->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); // The shift count needs to be in CL. locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1))); @@ -3939,7 +3903,7 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { Location second = locations->InAt(1); switch (op->GetResultType()) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { if (second.IsRegister()) { CpuRegister second_reg = second.AsRegister<CpuRegister>(); if (op->IsShl()) { @@ -3961,7 +3925,7 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { } break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (second.IsRegister()) { CpuRegister second_reg = second.AsRegister<CpuRegister>(); if (op->IsShl()) { @@ -3991,11 +3955,11 @@ void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) { void LocationsBuilderX86_64::VisitRor(HRor* ror) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall); switch (ror->GetResultType()) { - case Primitive::kPrimInt: - case Primitive::kPrimLong: { + case DataType::Type::kInt32: + case DataType::Type::kInt64: { locations->SetInAt(0, Location::RequiresRegister()); // The shift count needs to be in CL (unless it is a constant). locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1))); @@ -4014,7 +3978,7 @@ void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { Location second = locations->InAt(1); switch (ror->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (second.IsRegister()) { CpuRegister second_reg = second.AsRegister<CpuRegister>(); __ rorl(first_reg, second_reg); @@ -4023,7 +3987,7 @@ void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) { __ rorl(first_reg, imm); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (second.IsRegister()) { CpuRegister second_reg = second.AsRegister<CpuRegister>(); __ rorq(first_reg, second_reg); @@ -4063,8 +4027,8 @@ void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) { } void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; if (instruction->IsStringAlloc()) { locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument)); @@ -4092,8 +4056,8 @@ void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) } void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetOut(Location::RegisterLocation(RAX)); locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); @@ -4112,7 +4076,7 @@ void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) { void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); Location location = parameter_visitor_.GetNextLocation(instruction->GetType()); if (location.IsStackSlot()) { location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize()); @@ -4129,7 +4093,7 @@ void InstructionCodeGeneratorX86_64::VisitParameterValue( void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument)); } @@ -4140,7 +4104,7 @@ void InstructionCodeGeneratorX86_64::VisitCurrentMethod( void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -4165,7 +4129,7 @@ void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruct void LocationsBuilderX86_64::VisitNot(HNot* not_) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4176,11 +4140,11 @@ void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { locations->Out().AsRegister<CpuRegister>().AsRegister()); Location out = locations->Out(); switch (not_->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ notl(out.AsRegister<CpuRegister>()); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ notq(out.AsRegister<CpuRegister>()); break; @@ -4191,7 +4155,7 @@ void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) { void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } @@ -4206,7 +4170,7 @@ void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) { void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) { locations->SetInAt(i, Location::Any()); } @@ -4245,17 +4209,17 @@ void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) { DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); bool object_field_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_field_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_field_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_field_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister()); } else { // The output overlaps for an object field get when read barriers @@ -4276,36 +4240,38 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, CpuRegister base = base_loc.AsRegister<CpuRegister>(); Location out = locations->Out(); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType())); + DataType::Type load_type = instruction->GetType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); - switch (field_type) { - case Primitive::kPrimBoolean: { + switch (load_type) { + case DataType::Type::kBool: + case DataType::Type::kUint8: { __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimShort: { - __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); + case DataType::Type::kUint16: { + __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimChar: { - __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset)); + case DataType::Type::kInt16: { + __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { __ movl(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { // /* HeapReference<Object> */ out = *(base + offset) if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Note that a potential implicit null check is handled in this @@ -4329,27 +4295,29 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { __ movq(out.AsRegister<CpuRegister>(), Address(base, offset)); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset)); break; } - case Primitive::kPrimVoid: - LOG(FATAL) << "Unreachable type " << field_type; + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: + LOG(FATAL) << "Unreachable type " << load_type; UNREACHABLE(); } - if (field_type == Primitive::kPrimNot) { + if (load_type == DataType::Type::kReference) { // Potential implicit null checks, in the case of reference // fields, are handled in the previous switch statement. } else { @@ -4357,7 +4325,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction, } if (is_volatile) { - if (field_type == Primitive::kPrimNot) { + if (load_type == DataType::Type::kReference) { // Memory barriers, in the case of references, are also handled // in the previous switch statement. } else { @@ -4371,14 +4339,14 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - Primitive::Type field_type = field_info.GetFieldType(); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + DataType::Type field_type = field_info.GetFieldType(); bool is_volatile = field_info.IsVolatile(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); locations->SetInAt(0, Location::RequiresRegister()); - if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) { + if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) { if (is_volatile) { // In order to satisfy the semantics of volatile, this must be a single instruction store. locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1))); @@ -4397,7 +4365,7 @@ void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction, // Temporary registers for the write barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); - } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { + } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { // Temporary register for the reference poisoning. locations->AddTemp(Location::RequiresRegister()); } @@ -4412,7 +4380,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>(); Location value = locations->InAt(1); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); uint32_t offset = field_info.GetFieldOffset().Uint32Value(); if (is_volatile) { @@ -4422,39 +4390,40 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, bool maybe_record_implicit_null_check_done = false; switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { if (value.IsConstant()) { - int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - __ movb(Address(base, offset), Immediate(v)); + __ movb(Address(base, offset), + Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); } else { __ movb(Address(base, offset), value.AsRegister<CpuRegister>()); } break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { if (value.IsConstant()) { - int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - __ movw(Address(base, offset), Immediate(v)); + __ movw(Address(base, offset), + Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); } else { __ movw(Address(base, offset), value.AsRegister<CpuRegister>()); } break; } - case Primitive::kPrimInt: - case Primitive::kPrimNot: { + case DataType::Type::kInt32: + case DataType::Type::kReference: { if (value.IsConstant()) { int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant()); - // `field_type == Primitive::kPrimNot` implies `v == 0`. - DCHECK((field_type != Primitive::kPrimNot) || (v == 0)); + // `field_type == DataType::Type::kReference` implies `v == 0`. + DCHECK((field_type != DataType::Type::kReference) || (v == 0)); // Note: if heap poisoning is enabled, no need to poison // (negate) `v` if it is a reference, as it would be null. __ movl(Address(base, offset), Immediate(v)); } else { - if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && field_type == DataType::Type::kReference) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); __ movl(temp, value.AsRegister<CpuRegister>()); __ PoisonHeapReference(temp); @@ -4466,7 +4435,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (value.IsConstant()) { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); codegen_->MoveInt64ToAddress(Address(base, offset), @@ -4480,7 +4449,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (value.IsConstant()) { int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue()); @@ -4491,7 +4460,7 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (value.IsConstant()) { int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue()); @@ -4506,7 +4475,9 @@ void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction, break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << field_type; UNREACHABLE(); } @@ -4646,7 +4617,7 @@ void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) { } void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) { - SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction); + SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction); AddSlowPath(slow_path); LocationSummary* locations = instruction->GetLocations(); @@ -4671,18 +4642,18 @@ void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) { void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) { bool object_array_get_with_read_barrier = - kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot); + kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference); LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, - object_array_get_with_read_barrier ? - LocationSummary::kCallOnSlowPath : - LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, + object_array_get_with_read_barrier + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall); if (object_array_get_with_read_barrier && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(instruction->GetType())) { + if (DataType::IsFloatingPointType(instruction->GetType())) { locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } else { // The output overlaps for an object array get when read barriers @@ -4702,27 +4673,22 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { Location out_loc = locations->Out(); uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); switch (type) { - case Primitive::kPrimBoolean: { + case DataType::Type::kBool: + case DataType::Type::kUint8: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } - case Primitive::kPrimByte: { + case DataType::Type::kInt8: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset)); break; } - case Primitive::kPrimShort: { - CpuRegister out = out_loc.AsRegister<CpuRegister>(); - __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); - break; - } - - case Primitive::kPrimChar: { + case DataType::Type::kUint16: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); if (mirror::kUseStringCompression && instruction->IsStringCharAt()) { // Branch cases into compressed and uncompressed for each index's type. @@ -4744,13 +4710,19 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt16: { + CpuRegister out = out_loc.AsRegister<CpuRegister>(); + __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset)); + break; + } + + case DataType::Type::kInt32: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { static_assert( sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t), "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes."); @@ -4780,30 +4752,32 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { CpuRegister out = out_loc.AsRegister<CpuRegister>(); __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset)); break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { XmmRegister out = out_loc.AsFpuRegister<XmmRegister>(); __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset)); break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << type; UNREACHABLE(); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Potential implicit null checks, in the case of reference // arrays, are handled in the previous switch statement. } else { @@ -4812,13 +4786,13 @@ void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) { } void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( instruction, may_need_runtime_call_for_type_check ? LocationSummary::kCallOnSlowPath : @@ -4826,7 +4800,7 @@ void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) { locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1))); - if (Primitive::IsFloatingPointType(value_type)) { + if (DataType::IsFloatingPointType(value_type)) { locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2))); } else { locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2))); @@ -4845,7 +4819,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { CpuRegister array = array_loc.AsRegister<CpuRegister>(); Location index = locations->InAt(1); Location value = locations->InAt(2); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue()); @@ -4854,34 +4828,35 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value(); switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset); if (value.IsRegister()) { __ movb(address, value.AsRegister<CpuRegister>()); } else { - __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant()))); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimShort: - case Primitive::kPrimChar: { + case DataType::Type::kUint16: + case DataType::Type::kInt16: { uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset); if (value.IsRegister()) { __ movw(address, value.AsRegister<CpuRegister>()); } else { DCHECK(value.IsConstant()) << value; - __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue())); + __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant()))); } codegen_->MaybeRecordImplicitNullCheck(instruction); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); @@ -4906,7 +4881,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { Location temp_loc = locations->GetTemp(0); CpuRegister temp = temp_loc.AsRegister<CpuRegister>(); if (may_need_runtime_call_for_type_check) { - slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction); + slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction); codegen_->AddSlowPath(slow_path); if (instruction->GetValueCanBeNull()) { __ testl(register_value, register_value); @@ -4977,7 +4952,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); if (value.IsRegister()) { @@ -4991,7 +4966,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); if (value.IsRegister()) { @@ -5006,7 +4981,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset); if (value.IsFpuRegister()) { @@ -5020,7 +4995,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value(); Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset); if (value.IsFpuRegister()) { @@ -5036,7 +5011,9 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unreachable type " << instruction->GetType(); UNREACHABLE(); } @@ -5044,7 +5021,7 @@ void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) { void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); if (!instruction->IsEmittedAtUseSite()) { locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -5085,7 +5062,8 @@ void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) LocationSummary* locations = instruction->GetLocations(); Location index_loc = locations->InAt(0); Location length_loc = locations->InAt(1); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction); + SlowPathCode* slow_path = + new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction); if (length_loc.IsConstant()) { int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant()); @@ -5167,12 +5145,19 @@ void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIB } void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) { + if (instruction->GetNext()->IsSuspendCheck() && + instruction->GetBlock()->GetLoopInformation() != nullptr) { + HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck(); + // The back edge will generate the suspend check. + codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction); + } + codegen_->GetMoveResolver()->EmitNativeCode(instruction); } void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnSlowPath); // In suspend check slow path, usually there are no caller-save registers at all. // If SIMD instructions are present, however, we force spilling all live SIMD // registers in full width (since the runtime only saves/restores lower part). @@ -5199,12 +5184,12 @@ void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruc SuspendCheckSlowPathX86_64* slow_path = down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath()); if (slow_path == nullptr) { - slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor); + slow_path = + new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor); instruction->SetSlowPath(slow_path); codegen_->AddSlowPath(slow_path); if (successor != nullptr) { DCHECK(successor->IsLoopHeader()); - codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction); } } else { DCHECK_EQ(slow_path->GetSuccessor(), successor); @@ -5267,9 +5252,17 @@ void ParallelMoveResolverX86_64::EmitMove(size_t index) { __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); } } else if (source.IsSIMDStackSlot()) { - DCHECK(destination.IsFpuRegister()); - __ movups(destination.AsFpuRegister<XmmRegister>(), - Address(CpuRegister(RSP), source.GetStackIndex())); + if (destination.IsFpuRegister()) { + __ movups(destination.AsFpuRegister<XmmRegister>(), + Address(CpuRegister(RSP), source.GetStackIndex())); + } else { + DCHECK(destination.IsSIMDStackSlot()); + size_t high = kX86_64WordSize; + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex())); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP)); + __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high)); + __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP)); + } } else if (source.IsConstant()) { HConstant* constant = source.GetConstant(); if (constant->IsIntConstant() || constant->IsNullConstant()) { @@ -5337,19 +5330,6 @@ void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) { __ movl(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movl(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) { __ movq(CpuRegister(TMP), reg1); __ movq(reg1, reg2); @@ -5362,19 +5342,6 @@ void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) { __ movq(reg, CpuRegister(TMP)); } -void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) { - ScratchRegisterScope ensure_scratch( - this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); - - int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); - __ movq(CpuRegister(ensure_scratch.GetRegister()), - Address(CpuRegister(RSP), mem2 + stack_offset)); - __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); - __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), - CpuRegister(ensure_scratch.GetRegister())); -} - void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) { __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem)); __ movss(Address(CpuRegister(RSP), mem), reg); @@ -5387,6 +5354,48 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { __ movd(reg, CpuRegister(TMP)); } +void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) { + size_t extra_slot = 2 * kX86_64WordSize; + __ subq(CpuRegister(RSP), Immediate(extra_slot)); + __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg)); + ExchangeMemory64(0, mem + extra_slot, 2); + __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0)); + __ addq(CpuRegister(RSP), Immediate(extra_slot)); +} + +void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movl(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP)); + __ movl(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); +} + +void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) { + ScratchRegisterScope ensure_scratch( + this, TMP, RAX, codegen_->GetNumberOfCoreRegisters()); + + int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0; + + // Now that temp registers are available (possibly spilled), exchange blocks of memory. + for (int i = 0; i < num_of_qwords; i++) { + __ movq(CpuRegister(TMP), + Address(CpuRegister(RSP), mem1 + stack_offset)); + __ movq(CpuRegister(ensure_scratch.GetRegister()), + Address(CpuRegister(RSP), mem2 + stack_offset)); + __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), + CpuRegister(TMP)); + __ movq(Address(CpuRegister(RSP), mem1 + stack_offset), + CpuRegister(ensure_scratch.GetRegister())); + stack_offset += kX86_64WordSize; + } +} + void ParallelMoveResolverX86_64::EmitSwap(size_t index) { MoveOperands* move = moves_[index]; Location source = move->GetSource(); @@ -5399,13 +5408,13 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { } else if (source.IsStackSlot() && destination.IsRegister()) { Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsStackSlot() && destination.IsStackSlot()) { - Exchange32(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex()); } else if (source.IsRegister() && destination.IsDoubleStackSlot()) { Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsRegister()) { Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) { - Exchange64(destination.GetStackIndex(), source.GetStackIndex()); + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1); } else if (source.IsFpuRegister() && destination.IsFpuRegister()) { __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>()); __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>()); @@ -5418,6 +5427,12 @@ void ParallelMoveResolverX86_64::EmitSwap(size_t index) { Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) { Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); + } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) { + ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2); + } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) { + Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex()); + } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) { + Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex()); } else { LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination; } @@ -5435,9 +5450,14 @@ void ParallelMoveResolverX86_64::RestoreScratch(int reg) { void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck( SlowPathCode* slow_path, CpuRegister class_reg) { - __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()), - Immediate(mirror::Class::kStatusInitialized)); - __ j(kLess, slow_path->GetEntryLabel()); + constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf(); + const size_t status_byte_offset = + mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte); + constexpr uint32_t shifted_initialized_value = + enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte); + + __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value)); + __ j(kBelow, slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); // No need for memory fence, thanks to the x86-64 memory model. } @@ -5451,6 +5471,7 @@ HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind( case HLoadClass::LoadKind::kReferrersClass: break; case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5480,7 +5501,7 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier) ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind); if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -5503,12 +5524,11 @@ void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) { } Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle) { - jit_class_roots_.Overwrite( - TypeReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle); // Add a patch entry and return the label. - jit_class_patches_.emplace_back(dex_file, dex_index.index_); + jit_class_patches_.emplace_back(&dex_file, type_index.index_); PatchInfo<Label>* info = &jit_class_patches_.back(); return &info->label; } @@ -5549,7 +5569,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S DCHECK(codegen_->GetCompilerOptions().IsBootImage()); DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootTypePatch(cls); + codegen_->RecordBootImageTypePatch(cls); break; case HLoadClass::LoadKind::kBootImageAddress: { DCHECK_EQ(read_barrier_option, kWithoutReadBarrier); @@ -5559,6 +5579,18 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. break; } + case HLoadClass::LoadKind::kBootImageClassTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordBootImageTypePatch(cls); + // Extract the reference from the slot data, i.e. clear the hash bits. + int32_t masked_hash = ClassTable::TableSlot::MaskHash( + ComputeModifiedUtf8Hash(cls->GetDexFile().StringByTypeIdx(cls->GetTypeIndex()))); + if (masked_hash != 0) { + __ subl(out, Immediate(masked_hash)); + } + break; + } case HLoadClass::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false); @@ -5584,7 +5616,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S if (generate_null_check || cls->MustGenerateClinitCheck()) { DCHECK(cls->CanCallRuntime()); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck()); codegen_->AddSlowPath(slow_path); if (generate_null_check) { @@ -5601,7 +5633,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_S void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath); + new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath); locations->SetInAt(0, Location::RequiresRegister()); if (check->HasUses()) { locations->SetOut(Location::SameAsFirstInput()); @@ -5610,7 +5642,7 @@ void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) { void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) { // We assume the class to not be null. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64( check->GetLoadClass(), check, check->GetDexPc(), true); codegen_->AddSlowPath(slow_path); GenerateClassInitializationCheck(slow_path, @@ -5621,6 +5653,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( HLoadString::LoadKind desired_string_load_kind) { switch (desired_string_load_kind) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: DCHECK(!Runtime::Current()->UseJitCompilation()); break; @@ -5636,7 +5669,7 @@ HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind( void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load); - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind); if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) { locations->SetOut(Location::RegisterLocation(RAX)); } else { @@ -5656,12 +5689,11 @@ void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) { } Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle) { - jit_string_roots_.Overwrite( - StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference())); + ReserveJitStringRoot(StringReference(&dex_file, string_index), handle); // Add a patch entry and return the label. - jit_string_patches_.emplace_back(dex_file, dex_index.index_); + jit_string_patches_.emplace_back(&dex_file, string_index.index_); PatchInfo<Label>* info = &jit_string_patches_.back(); return &info->label; } @@ -5677,15 +5709,21 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA case HLoadString::LoadKind::kBootImageLinkTimePcRelative: { DCHECK(codegen_->GetCompilerOptions().IsBootImage()); __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); - codegen_->RecordBootStringPatch(load); - return; // No dex cache slow path. + codegen_->RecordBootImageStringPatch(load); + return; } case HLoadString::LoadKind::kBootImageAddress: { uint32_t address = dchecked_integral_cast<uint32_t>( reinterpret_cast<uintptr_t>(load->GetString().Get())); DCHECK_NE(address, 0u); __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended. - return; // No dex cache slow path. + return; + } + case HLoadString::LoadKind::kBootImageInternTable: { + DCHECK(!codegen_->GetCompilerOptions().IsBootImage()); + __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false)); + codegen_->RecordBootImageStringPatch(load); + return; } case HLoadString::LoadKind::kBssEntry: { Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, @@ -5693,7 +5731,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREA Label* fixup_label = codegen_->NewStringBssEntryPatch(load); // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */ GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption); - SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load); codegen_->AddSlowPath(slow_path); __ testl(out, out); __ j(kEqual, slow_path->GetEntryLabel()); @@ -5729,7 +5767,7 @@ static Address GetExceptionTlsAddress() { void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); } @@ -5738,7 +5776,7 @@ void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) { } void LocationsBuilderX86_64::VisitClearException(HClearException* clear) { - new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall); } void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) { @@ -5746,8 +5784,8 @@ void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear } void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -5758,7 +5796,7 @@ void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) { } static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) { - if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) { + if (type_check_kind == TypeCheckKind::kInterfaceCheck) { // We need a temporary for holding the iftable length. return true; } @@ -5785,11 +5823,12 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { case TypeCheckKind::kExactCheck: case TypeCheckKind::kAbstractClassCheck: case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - call_kind = - kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; - baker_read_barrier_slow_path = kUseBakerReadBarrier; + case TypeCheckKind::kArrayObjectCheck: { + bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction); + call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall; + baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier; break; + } case TypeCheckKind::kArrayCheck: case TypeCheckKind::kUnresolvedCheck: case TypeCheckKind::kInterfaceCheck: @@ -5797,7 +5836,8 @@ void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) { break; } - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); if (baker_read_barrier_slow_path) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -5839,12 +5879,14 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { switch (type_check_kind) { case TypeCheckKind::kExactCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); if (cls.IsRegister()) { __ cmpl(out, cls.AsRegister<CpuRegister>()); } else { @@ -5865,12 +5907,14 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kAbstractClassCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // If the class is abstract, we eagerly fetch the super class of the // object to avoid doing a comparison we know will fail. NearLabel loop, success; @@ -5880,7 +5924,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5899,12 +5943,14 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kClassHierarchyCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // Walk over the class hierarchy to find a match. NearLabel loop, success; __ Bind(&loop); @@ -5920,7 +5966,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, super_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); __ j(kNotEqual, &loop); // If `out` is null, we use it for the result, and jump to `done`. @@ -5934,12 +5980,14 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } case TypeCheckKind::kArrayObjectCheck: { + ReadBarrierOption read_barrier_option = + CodeGenerator::ReadBarrierOptionForInstanceOf(instruction); // /* HeapReference<Class> */ out = obj->klass_ GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, - kCompilerReadBarrierOption); + read_barrier_option); // Do an exact check. NearLabel exact_check; if (cls.IsRegister()) { @@ -5955,7 +6003,7 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { out_loc, component_offset, maybe_temp_loc, - kCompilerReadBarrierOption); + read_barrier_option); __ testl(out, out); // If `out` is null, we use it for the result, and jump to `done`. __ j(kEqual, &done); @@ -5982,8 +6030,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex())); } DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ j(kNotEqual, slow_path->GetEntryLabel()); __ movl(out, Immediate(1)); @@ -6014,8 +6062,8 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { // call to the runtime not using a type checking slow path). // This should also be beneficial for the other cases above. DCHECK(locations->OnlyCallsOnSlowPath()); - slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, - /* is_fatal */ false); + slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( + instruction, /* is_fatal */ false); codegen_->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); if (zero.IsLinked()) { @@ -6039,31 +6087,11 @@ void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) { } } -static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) { - switch (type_check_kind) { - case TypeCheckKind::kExactCheck: - case TypeCheckKind::kAbstractClassCheck: - case TypeCheckKind::kClassHierarchyCheck: - case TypeCheckKind::kArrayObjectCheck: - return !throws_into_catch && !kEmitCompilerReadBarrier; - case TypeCheckKind::kInterfaceCheck: - return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences; - case TypeCheckKind::kArrayCheck: - case TypeCheckKind::kUnresolvedCheck: - return false; - } - LOG(FATAL) << "Unreachable"; - UNREACHABLE(); -} - void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) { - bool throws_into_catch = instruction->CanThrowIntoCatchBlock(); TypeCheckKind type_check_kind = instruction->GetTypeCheckKind(); - bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch); - LocationSummary::CallKind call_kind = is_fatal_slow_path - ? LocationSummary::kNoCall - : LocationSummary::kCallOnSlowPath; - LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind); + LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction); + LocationSummary* locations = + new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind); locations->SetInAt(0, Location::RequiresRegister()); if (type_check_kind == TypeCheckKind::kInterfaceCheck) { // Require a register for the interface check since there is a loop that compares the class to @@ -6102,14 +6130,10 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { const uint32_t object_array_data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); - // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases - // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding - // read barriers is done for performance and code size reasons. - bool is_type_check_slow_path_fatal = - IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock()); + bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction); SlowPathCode* type_check_slow_path = - new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction, - is_type_check_slow_path_fatal); + new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64( + instruction, is_type_check_slow_path_fatal); codegen_->AddSlowPath(type_check_slow_path); @@ -6260,42 +6284,40 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } case TypeCheckKind::kInterfaceCheck: - // Fast path for the interface check. We always go slow path for heap poisoning since - // unpoisoning cls would require an extra temp. - if (!kPoisonHeapReferences) { - // Try to avoid read barriers to improve the fast path. We can not get false positives by - // doing this. - // /* HeapReference<Class> */ temp = obj->klass_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - obj_loc, - class_offset, - kWithoutReadBarrier); - - // /* HeapReference<Class> */ temp = temp->iftable_ - GenerateReferenceLoadTwoRegisters(instruction, - temp_loc, - temp_loc, - iftable_offset, - kWithoutReadBarrier); - // Iftable is never null. - __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); - // Loop through the iftable and check if any class matches. - NearLabel start_loop; - __ Bind(&start_loop); - // Need to subtract first to handle the empty array case. - __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); - __ j(kNegative, type_check_slow_path->GetEntryLabel()); - // Go to next interface if the classes do not match. - __ cmpl(cls.AsRegister<CpuRegister>(), - CodeGeneratorX86_64::ArrayAddress(temp, - maybe_temp2_loc, - TIMES_4, - object_array_data_offset)); - __ j(kNotEqual, &start_loop); // Return if same class. - } else { - __ jmp(type_check_slow_path->GetEntryLabel()); - } + // Fast path for the interface check. Try to avoid read barriers to improve the fast path. + // We can not get false positives by doing this. + // /* HeapReference<Class> */ temp = obj->klass_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + obj_loc, + class_offset, + kWithoutReadBarrier); + + // /* HeapReference<Class> */ temp = temp->iftable_ + GenerateReferenceLoadTwoRegisters(instruction, + temp_loc, + temp_loc, + iftable_offset, + kWithoutReadBarrier); + // Iftable is never null. + __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset)); + // Maybe poison the `cls` for direct comparison with memory. + __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>()); + // Loop through the iftable and check if any class matches. + NearLabel start_loop; + __ Bind(&start_loop); + // Need to subtract first to handle the empty array case. + __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2)); + __ j(kNegative, type_check_slow_path->GetEntryLabel()); + // Go to next interface if the classes do not match. + __ cmpl(cls.AsRegister<CpuRegister>(), + CodeGeneratorX86_64::ArrayAddress(temp, + maybe_temp2_loc, + TIMES_4, + object_array_data_offset)); + __ j(kNotEqual, &start_loop); // Return if same class. + // If `cls` was poisoned above, unpoison it. + __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>()); break; } @@ -6307,8 +6329,8 @@ void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) { } void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) { - LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly); + LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary( + instruction, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -6330,9 +6352,9 @@ void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperatio void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall); - DCHECK(instruction->GetResultType() == Primitive::kPrimInt - || instruction->GetResultType() == Primitive::kPrimLong); + new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall); + DCHECK(instruction->GetResultType() == DataType::Type::kInt32 + || instruction->GetResultType() == DataType::Type::kInt64); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::Any()); locations->SetOut(Location::SameAsFirstInput()); @@ -6356,7 +6378,7 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in Location second = locations->InAt(1); DCHECK(first.Equals(locations->Out())); - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { if (second.IsRegister()) { if (instruction->IsAnd()) { __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>()); @@ -6388,7 +6410,7 @@ void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* in } } } else { - DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64); CpuRegister first_reg = first.AsRegister<CpuRegister>(); bool second_is_constant = false; int64_t value = 0; @@ -6534,7 +6556,7 @@ void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad( "have different sizes."); // Slow path marking the GC root `root`. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( instruction, root, /* unpoison_ref_before_marking */ false); codegen_->AddSlowPath(slow_path); @@ -6666,10 +6688,10 @@ void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction if (always_update_field) { DCHECK(temp1 != nullptr); DCHECK(temp2 != nullptr); - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( + slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64( instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2); } else { - slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64( + slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64( instruction, ref, /* unpoison_ref_before_marking */ true); } AddSlowPath(slow_path); @@ -6702,7 +6724,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction, // not used by the artReadBarrierSlow entry point. // // TODO: Unpoison `ref` when it is used by artReadBarrierSlow. - SlowPathCode* slow_path = new (GetGraph()->GetArena()) + SlowPathCode* slow_path = new (GetScopedAllocator()) ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index); AddSlowPath(slow_path); @@ -6738,7 +6760,7 @@ void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instructi // Note that GC roots are not affected by heap poisoning, so we do // not need to do anything special for this here. SlowPathCode* slow_path = - new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); + new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root); AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); @@ -6758,7 +6780,7 @@ void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTR // Simple implementation of packed switch - generate cascaded compare/jumps. void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) { LocationSummary* locations = - new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall); + new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -6845,6 +6867,16 @@ void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_ins __ jmp(temp_reg); } +void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + +void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction + ATTRIBUTE_UNUSED) { + LOG(FATAL) << "Unreachable"; +} + void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) { if (value == 0) { __ xorl(dest, dest); @@ -7046,33 +7078,33 @@ void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) { } Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v)); + AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v)); return Address::RIP(fixup); } Address CodeGeneratorX86_64::LiteralFloatAddress(float v) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v)); + AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v)); return Address::RIP(fixup); } Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v)); + AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v)); return Address::RIP(fixup); } Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) { - AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v)); + AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v)); return Address::RIP(fixup); } // TODO: trg as memory. -void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) { +void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) { if (!trg.IsValid()) { - DCHECK_EQ(type, Primitive::kPrimVoid); + DCHECK_EQ(type, DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type); if (trg.Equals(return_loc)) { @@ -7080,7 +7112,7 @@ void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type t } // Let the parallel move resolver take care of all of this. - HParallelMove parallel_move(GetGraph()->GetArena()); + HParallelMove parallel_move(GetGraph()->GetAllocator()); parallel_move.AddMove(return_loc, trg, type, nullptr); GetMoveResolver()->EmitNativeCode(¶llel_move); } @@ -7088,7 +7120,7 @@ void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type t Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) { // Create a fixup to be used to create and address the jump table. JumpTableRIPFixup* table_fixup = - new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr); + new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr); // We have to populate the jump tables. fixups_to_jump_tables_.push_back(table_fixup); @@ -7127,18 +7159,14 @@ void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code, void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) { for (const PatchInfo<Label>& info : jit_string_patches_) { - const auto it = jit_string_roots_.find( - StringReference(&info.dex_file, dex::StringIndex(info.index))); - DCHECK(it != jit_string_roots_.end()); - uint64_t index_in_table = it->second; + StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index)); + uint64_t index_in_table = GetJitStringRootIndex(string_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } for (const PatchInfo<Label>& info : jit_class_patches_) { - const auto it = jit_class_roots_.find( - TypeReference(&info.dex_file, dex::TypeIndex(info.index))); - DCHECK(it != jit_class_roots_.end()); - uint64_t index_in_table = it->second; + TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index)); + uint64_t index_in_table = GetJitClassRootIndex(type_reference); PatchJitRootUse(code, roots_data, info, index_in_table); } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 33c64290d4..1079e94dfc 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -89,16 +89,16 @@ class FieldAccessCallingConventionX86_64 : public FieldAccessCallingConvention { Location GetFieldIndexLocation() const OVERRIDE { return Location::RegisterLocation(RDI); } - Location GetReturnLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetReturnLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::RegisterLocation(RAX); } - Location GetSetValueLocation(Primitive::Type type ATTRIBUTE_UNUSED, bool is_instance) + Location GetSetValueLocation(DataType::Type type ATTRIBUTE_UNUSED, bool is_instance) const OVERRIDE { return is_instance ? Location::RegisterLocation(RDX) : Location::RegisterLocation(RSI); } - Location GetFpuLocation(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + Location GetFpuLocation(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return Location::FpuRegisterLocation(XMM0); } @@ -112,8 +112,8 @@ class InvokeDexCallingConventionVisitorX86_64 : public InvokeDexCallingConventio InvokeDexCallingConventionVisitorX86_64() {} virtual ~InvokeDexCallingConventionVisitorX86_64() {} - Location GetNextLocation(Primitive::Type type) OVERRIDE; - Location GetReturnLocation(Primitive::Type type) const OVERRIDE; + Location GetNextLocation(DataType::Type type) OVERRIDE; + Location GetReturnLocation(DataType::Type type) const OVERRIDE; Location GetMethodLocation() const OVERRIDE; private: @@ -139,11 +139,12 @@ class ParallelMoveResolverX86_64 : public ParallelMoveResolverWithSwap { private: void Exchange32(CpuRegister reg, int mem); void Exchange32(XmmRegister reg, int mem); - void Exchange32(int mem1, int mem2); void Exchange64(CpuRegister reg1, CpuRegister reg2); void Exchange64(CpuRegister reg, int mem); void Exchange64(XmmRegister reg, int mem); - void Exchange64(int mem1, int mem2); + void Exchange128(XmmRegister reg, int mem); + void ExchangeMemory32(int mem1, int mem2); + void ExchangeMemory64(int mem1, int mem2, int num_of_qwords); CodeGeneratorX86_64* const codegen_; @@ -299,7 +300,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateFrameExit() OVERRIDE; void Bind(HBasicBlock* block) OVERRIDE; void MoveConstant(Location destination, int32_t value) OVERRIDE; - void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE; + void MoveLocation(Location dst, Location src, DataType::Type dst_type) OVERRIDE; void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE; size_t SaveCoreRegister(size_t stack_index, uint32_t reg_id) OVERRIDE; @@ -384,7 +385,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { block_labels_ = CommonInitializeLabels<Label>(); } - bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { + bool NeedsTwoRegisters(DataType::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; } @@ -409,22 +410,22 @@ class CodeGeneratorX86_64 : public CodeGenerator { void GenerateVirtualCall( HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE; - void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke); - Label* NewMethodBssEntryPatch(MethodReference target_method); - void RecordBootTypePatch(HLoadClass* load_class); + void RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke); + void RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke); + void RecordBootImageTypePatch(HLoadClass* load_class); Label* NewTypeBssEntryPatch(HLoadClass* load_class); - void RecordBootStringPatch(HLoadString* load_string); + void RecordBootImageStringPatch(HLoadString* load_string); Label* NewStringBssEntryPatch(HLoadString* load_string); Label* NewJitRootStringPatch(const DexFile& dex_file, - dex::StringIndex dex_index, + dex::StringIndex string_index, Handle<mirror::String> handle); Label* NewJitRootClassPatch(const DexFile& dex_file, - dex::TypeIndex dex_index, + dex::TypeIndex type_index, Handle<mirror::Class> handle); - void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE; + void MoveFromReturnRegister(Location trg, DataType::Type type) OVERRIDE; - void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE; + void EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) OVERRIDE; void PatchJitRootUse(uint8_t* code, const uint8_t* roots_data, @@ -586,9 +587,9 @@ class CodeGeneratorX86_64 : public CodeGenerator { static constexpr int32_t kDummy32BitOffset = 256; private: - template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> + template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)> static void EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>>& infos, - ArenaVector<LinkerPatch>* linker_patches); + ArenaVector<linker::LinkerPatch>* linker_patches); // Labels for each block that will be compiled. Label* block_labels_; // Indexed by block id. @@ -611,8 +612,10 @@ class CodeGeneratorX86_64 : public CodeGenerator { ArenaDeque<PatchInfo<Label>> boot_image_type_patches_; // Type patch locations for kBssEntry. ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_; - // String patch locations; type depends on configuration (app .bss or boot image). - ArenaDeque<PatchInfo<Label>> string_patches_; + // String patch locations; type depends on configuration (intern table or boot image PIC). + ArenaDeque<PatchInfo<Label>> boot_image_string_patches_; + // String patch locations for kBssEntry. + ArenaDeque<PatchInfo<Label>> string_bss_entry_patches_; // Patches for string literals in JIT compiled code. ArenaDeque<PatchInfo<Label>> jit_string_patches_; diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc index e598e19b67..2e31d35584 100644 --- a/compiler/optimizing/code_sinking.cc +++ b/compiler/optimizing/code_sinking.cc @@ -16,6 +16,10 @@ #include "code_sinking.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "common_dominator.h" #include "nodes.h" @@ -30,7 +34,9 @@ void CodeSinking::Run() { // TODO(ngeoffray): we do not profile branches yet, so use throw instructions // as an indicator of an uncommon branch. for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) { - if (exit_predecessor->GetLastInstruction()->IsThrow()) { + HInstruction* last = exit_predecessor->GetLastInstruction(); + // Any predecessor of the exit that does not return, throws an exception. + if (!last->IsReturn() && !last->IsReturnVoid()) { SinkCodeToUncommonBranch(exit_predecessor); } } @@ -64,6 +70,11 @@ static bool IsInterestingInstruction(HInstruction* instruction) { // A fence with "0" inputs is dead and should've been removed in a prior pass. DCHECK_NE(0u, ctor_fence->InputCount()); + // TODO: this should be simplified to 'return true' since it's + // potentially pessimizing any code sinking for inlined constructors with final fields. + // TODO: double check that if the final field assignments are not moved, + // then the fence is not moved either. + return ctor_fence->GetAssociatedAllocation() != nullptr; } @@ -110,7 +121,7 @@ static bool IsInterestingInstruction(HInstruction* instruction) { static void AddInstruction(HInstruction* instruction, const ArenaBitVector& processed_instructions, const ArenaBitVector& discard_blocks, - ArenaVector<HInstruction*>* worklist) { + ScopedArenaVector<HInstruction*>* worklist) { // Add to the work list if the instruction is not in the list of blocks // to discard, hasn't been already processed and is of interest. if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) && @@ -123,7 +134,7 @@ static void AddInstruction(HInstruction* instruction, static void AddInputs(HInstruction* instruction, const ArenaBitVector& processed_instructions, const ArenaBitVector& discard_blocks, - ArenaVector<HInstruction*>* worklist) { + ScopedArenaVector<HInstruction*>* worklist) { for (HInstruction* input : instruction->GetInputs()) { AddInstruction(input, processed_instructions, discard_blocks, worklist); } @@ -132,7 +143,7 @@ static void AddInputs(HInstruction* instruction, static void AddInputs(HBasicBlock* block, const ArenaBitVector& processed_instructions, const ArenaBitVector& discard_blocks, - ArenaVector<HInstruction*>* worklist) { + ScopedArenaVector<HInstruction*>* worklist) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { AddInputs(it.Current(), processed_instructions, discard_blocks, worklist); } @@ -203,6 +214,11 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, DCHECK(target_block != nullptr); } + // Bail if the instruction can throw and we are about to move into a catch block. + if (instruction->CanThrow() && target_block->GetTryCatchInformation() != nullptr) { + return nullptr; + } + // Find insertion position. No need to filter anymore, as we have found a // target block. HInstruction* insert_pos = nullptr; @@ -237,17 +253,19 @@ static HInstruction* FindIdealPosition(HInstruction* instruction, void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { - // Local allocator to discard data structures created below at the end of - // this optimization. - ArenaAllocator allocator(graph_->GetArena()->GetArenaPool()); + // Local allocator to discard data structures created below at the end of this optimization. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); size_t number_of_instructions = graph_->GetCurrentInstructionId(); - ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); + ScopedArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc)); ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false); + processed_instructions.ClearAllBits(); ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false); + post_dominated.ClearAllBits(); ArenaBitVector instructions_that_can_move( &allocator, number_of_instructions, /* expandable */ false); - ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); + instructions_that_can_move.ClearAllBits(); + ScopedArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc)); // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`. // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by @@ -414,7 +432,7 @@ void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) { if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) { continue; } - MaybeRecordStat(MethodCompilationStat::kInstructionSunk); + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSunk); instruction->MoveBefore(position, /* ensure_safety */ false); } } diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h index 59cda52a8c..836d9d4f67 100644 --- a/compiler/optimizing/code_sinking.h +++ b/compiler/optimizing/code_sinking.h @@ -28,8 +28,10 @@ namespace art { */ class CodeSinking : public HOptimization { public: - CodeSinking(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kCodeSinkingPassName, stats) {} + CodeSinking(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kCodeSinkingPassName) + : HOptimization(graph, name, stats) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc index 0a8e97cf0d..a0fd5ffcb1 100644 --- a/compiler/optimizing/codegen_test.cc +++ b/compiler/optimizing/codegen_test.cc @@ -18,15 +18,15 @@ #include <memory> #include "base/macros.h" +#include "base/utils.h" #include "builder.h" #include "codegen_test_utils.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "register_allocator_linear_scan.h" -#include "utils.h" #include "utils/arm/assembler_arm_vixl.h" #include "utils/arm/managed_register_arm.h" #include "utils/mips/managed_register_mips.h" @@ -44,22 +44,22 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { ::std::vector<CodegenTargetConfig> test_config_candidates = { #ifdef ART_ENABLE_CODEGEN_arm // TODO: Should't this be `kThumb2` instead of `kArm` here? - CodegenTargetConfig(kArm, create_codegen_arm_vixl32), + CodegenTargetConfig(InstructionSet::kArm, create_codegen_arm_vixl32), #endif #ifdef ART_ENABLE_CODEGEN_arm64 - CodegenTargetConfig(kArm64, create_codegen_arm64), + CodegenTargetConfig(InstructionSet::kArm64, create_codegen_arm64), #endif #ifdef ART_ENABLE_CODEGEN_x86 - CodegenTargetConfig(kX86, create_codegen_x86), + CodegenTargetConfig(InstructionSet::kX86, create_codegen_x86), #endif #ifdef ART_ENABLE_CODEGEN_x86_64 - CodegenTargetConfig(kX86_64, create_codegen_x86_64), + CodegenTargetConfig(InstructionSet::kX86_64, create_codegen_x86_64), #endif #ifdef ART_ENABLE_CODEGEN_mips - CodegenTargetConfig(kMips, create_codegen_mips), + CodegenTargetConfig(InstructionSet::kMips, create_codegen_mips), #endif #ifdef ART_ENABLE_CODEGEN_mips64 - CodegenTargetConfig(kMips64, create_codegen_mips64) + CodegenTargetConfig(InstructionSet::kMips64, create_codegen_mips64) #endif }; @@ -72,41 +72,45 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -static void TestCode(const uint16_t* data, - bool has_result = false, - int32_t expected = 0) { +class CodegenTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, bool has_result = false, int32_t expected = 0); + void TestCodeLong(const std::vector<uint16_t>& data, bool has_result, int64_t expected); + void TestComparison(IfCondition condition, + int64_t i, + int64_t j, + DataType::Type type, + const CodegenTargetConfig target_config); +}; + +void CodegenTest::TestCode(const std::vector<uint16_t>& data, bool has_result, int32_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateCFG(&arena, data); + ResetPoolAndAllocator(); + HGraph* graph = CreateCFG(data); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); } } -static void TestCodeLong(const uint16_t* data, - bool has_result, - int64_t expected) { +void CodegenTest::TestCodeLong(const std::vector<uint16_t>& data, + bool has_result, int64_t expected) { for (const CodegenTargetConfig& target_config : GetTargetConfigs()) { - ArenaPool pool; - ArenaAllocator arena(&pool); - HGraph* graph = CreateCFG(&arena, data, Primitive::kPrimLong); + ResetPoolAndAllocator(); + HGraph* graph = CreateCFG(data, DataType::Type::kInt64); // Remove suspend checks, they cannot be executed in this context. RemoveSuspendChecks(graph); RunCode(target_config, graph, [](HGraph*) {}, has_result, expected); } } -class CodegenTest : public CommonCompilerTest {}; - TEST_F(CodegenTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM(Instruction::RETURN_VOID); TestCode(data); } TEST_F(CodegenTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -114,7 +118,7 @@ TEST_F(CodegenTest, CFG1) { } TEST_F(CodegenTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -123,21 +127,21 @@ TEST_F(CodegenTest, CFG2) { } TEST_F(CodegenTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -146,7 +150,7 @@ TEST_F(CodegenTest, CFG3) { } TEST_F(CodegenTest, CFG4) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -155,7 +159,7 @@ TEST_F(CodegenTest, CFG4) { } TEST_F(CodegenTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -165,7 +169,7 @@ TEST_F(CodegenTest, CFG5) { } TEST_F(CodegenTest, IntConstant) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -173,7 +177,7 @@ TEST_F(CodegenTest, IntConstant) { } TEST_F(CodegenTest, Return1) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN | 0); @@ -181,7 +185,7 @@ TEST_F(CodegenTest, Return1) { } TEST_F(CodegenTest, Return2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 0 | 1 << 8, Instruction::RETURN | 1 << 8); @@ -190,7 +194,7 @@ TEST_F(CodegenTest, Return2) { } TEST_F(CodegenTest, Return3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::RETURN | 1 << 8); @@ -199,7 +203,7 @@ TEST_F(CodegenTest, Return3) { } TEST_F(CodegenTest, ReturnIf1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ, 3, @@ -210,7 +214,7 @@ TEST_F(CodegenTest, ReturnIf1) { } TEST_F(CodegenTest, ReturnIf2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::IF_EQ | 0 << 4 | 1 << 8, 3, @@ -221,17 +225,17 @@ TEST_F(CodegenTest, ReturnIf2) { } // Exercise bit-wise (one's complement) not-int instruction. -#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ -TEST_F(CodegenTest, TEST_NAME) { \ - const int32_t input = INPUT; \ - const uint16_t input_lo = Low16Bits(input); \ - const uint16_t input_hi = High16Bits(input); \ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( \ - Instruction::CONST | 0 << 8, input_lo, input_hi, \ - Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ - Instruction::RETURN | 1 << 8); \ - \ - TestCode(data, true, EXPECTED_OUTPUT); \ +#define NOT_INT_TEST(TEST_NAME, INPUT, EXPECTED_OUTPUT) \ +TEST_F(CodegenTest, TEST_NAME) { \ + const int32_t input = INPUT; \ + const uint16_t input_lo = Low16Bits(input); \ + const uint16_t input_hi = High16Bits(input); \ + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( \ + Instruction::CONST | 0 << 8, input_lo, input_hi, \ + Instruction::NOT_INT | 1 << 8 | 0 << 12 , \ + Instruction::RETURN | 1 << 8); \ + \ + TestCode(data, true, EXPECTED_OUTPUT); \ } NOT_INT_TEST(ReturnNotIntMinus2, -2, 1) @@ -253,7 +257,7 @@ TEST_F(CodegenTest, TEST_NAME) { \ const uint16_t word1 = High16Bits(Low32Bits(input)); \ const uint16_t word2 = Low16Bits(High32Bits(input)); \ const uint16_t word3 = High16Bits(High32Bits(input)); /* MSW. */ \ - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( \ + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( \ Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, \ Instruction::NOT_LONG | 2 << 8 | 0 << 12, \ Instruction::RETURN_WIDE | 2 << 8); \ @@ -303,7 +307,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FIVE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FIVE_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::CONST_WIDE | 2 << 8, 1, 0, 0, 0, Instruction::ADD_LONG | 0, 0 << 8 | 2, // v0 <- 2^32 + 1 @@ -315,7 +319,7 @@ TEST_F(CodegenTest, IntToLongOfLongToInt) { } TEST_F(CodegenTest, ReturnAdd1) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT, 1 << 8 | 0, @@ -325,7 +329,7 @@ TEST_F(CodegenTest, ReturnAdd1) { } TEST_F(CodegenTest, ReturnAdd2) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -335,7 +339,7 @@ TEST_F(CodegenTest, ReturnAdd2) { } TEST_F(CodegenTest, ReturnAdd3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -344,7 +348,7 @@ TEST_F(CodegenTest, ReturnAdd3) { } TEST_F(CodegenTest, ReturnAdd4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::ADD_INT_LIT16, 3, Instruction::RETURN); @@ -353,7 +357,7 @@ TEST_F(CodegenTest, ReturnAdd4) { } TEST_F(CodegenTest, ReturnMulInt) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT, 1 << 8 | 0, @@ -363,7 +367,7 @@ TEST_F(CodegenTest, ReturnMulInt) { } TEST_F(CodegenTest, ReturnMulInt2addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::MUL_INT_2ADDR | 1 << 12, @@ -373,7 +377,7 @@ TEST_F(CodegenTest, ReturnMulInt2addr) { } TEST_F(CodegenTest, ReturnMulLong) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG, 2 << 8 | 0, @@ -383,7 +387,7 @@ TEST_F(CodegenTest, ReturnMulLong) { } TEST_F(CodegenTest, ReturnMulLong2addr) { - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, 3, 0, 0, 0, Instruction::CONST_WIDE | 2 << 8, 4, 0, 0, 0, Instruction::MUL_LONG_2ADDR | 2 << 12, @@ -393,7 +397,7 @@ TEST_F(CodegenTest, ReturnMulLong2addr) { } TEST_F(CodegenTest, ReturnMulIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -402,7 +406,7 @@ TEST_F(CodegenTest, ReturnMulIntLit8) { } TEST_F(CodegenTest, ReturnMulIntLit16) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::MUL_INT_LIT16, 3, Instruction::RETURN); @@ -412,28 +416,25 @@ TEST_F(CodegenTest, ReturnMulIntLit16) { TEST_F(CodegenTest, NonMaterializedCondition) { for (CodegenTargetConfig target_config : GetTargetConfigs()) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - entry->AddInstruction(new (&allocator) HGoto()); + entry->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* first_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(first_block); entry->AddSuccessor(first_block); HIntConstant* constant0 = graph->GetIntConstant(0); HIntConstant* constant1 = graph->GetIntConstant(1); - HEqual* equal = new (&allocator) HEqual(constant0, constant0); + HEqual* equal = new (GetAllocator()) HEqual(constant0, constant0); first_block->AddInstruction(equal); - first_block->AddInstruction(new (&allocator) HIf(equal)); + first_block->AddInstruction(new (GetAllocator()) HIf(equal)); - HBasicBlock* then_block = new (&allocator) HBasicBlock(graph); - HBasicBlock* else_block = new (&allocator) HBasicBlock(graph); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* then_block = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* else_block = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->SetExitBlock(exit_block); graph->AddBlock(then_block); @@ -444,9 +445,9 @@ TEST_F(CodegenTest, NonMaterializedCondition) { then_block->AddSuccessor(exit_block); else_block->AddSuccessor(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); - then_block->AddInstruction(new (&allocator) HReturn(constant0)); - else_block->AddInstruction(new (&allocator) HReturn(constant1)); + exit_block->AddInstruction(new (GetAllocator()) HExit()); + then_block->AddInstruction(new (GetAllocator()) HReturn(constant0)); + else_block->AddInstruction(new (GetAllocator()) HReturn(constant1)); ASSERT_FALSE(equal->IsEmittedAtUseSite()); graph->BuildDominatorTree(); @@ -455,7 +456,7 @@ TEST_F(CodegenTest, NonMaterializedCondition) { auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + HParallelMove* move = new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; @@ -475,19 +476,17 @@ TEST_F(CodegenTest, MaterializedCondition1) { int rhs[] = {2, 1, 2, -1, 0xabc}; for (size_t i = 0; i < arraysize(lhs); i++) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); - HBasicBlock* code_block = new (&allocator) HBasicBlock(graph); + entry_block->AddInstruction(new (GetAllocator()) HGoto()); + HBasicBlock* code_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(code_block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + exit_block->AddInstruction(new (GetAllocator()) HExit()); entry_block->AddSuccessor(code_block); code_block->AddSuccessor(exit_block); @@ -503,7 +502,8 @@ TEST_F(CodegenTest, MaterializedCondition1) { graph->BuildDominatorTree(); auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + HParallelMove* move = + new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); @@ -523,24 +523,22 @@ TEST_F(CodegenTest, MaterializedCondition2) { for (size_t i = 0; i < arraysize(lhs); i++) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); + entry_block->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* if_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(if_block); - HBasicBlock* if_true_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* if_true_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(if_true_block); - HBasicBlock* if_false_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* if_false_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(if_false_block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + exit_block->AddInstruction(new (GetAllocator()) HExit()); graph->SetEntryBlock(entry_block); entry_block->AddSuccessor(if_block); @@ -571,7 +569,8 @@ TEST_F(CodegenTest, MaterializedCondition2) { graph->BuildDominatorTree(); auto hook_before_codegen = [](HGraph* graph_in) { HBasicBlock* block = graph_in->GetEntryBlock()->GetSuccessors()[0]; - HParallelMove* move = new (graph_in->GetArena()) HParallelMove(graph_in->GetArena()); + HParallelMove* move = + new (graph_in->GetAllocator()) HParallelMove(graph_in->GetAllocator()); block->InsertInstructionBefore(move, block->GetLastInstruction()); }; RunCode(target_config, graph, hook_before_codegen, true, lhs[i] < rhs[i]); @@ -580,7 +579,7 @@ TEST_F(CodegenTest, MaterializedCondition2) { } TEST_F(CodegenTest, ReturnDivIntLit8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0 << 8, Instruction::DIV_INT_LIT8, 3 << 8 | 0, Instruction::RETURN); @@ -589,7 +588,7 @@ TEST_F(CodegenTest, ReturnDivIntLit8) { } TEST_F(CodegenTest, ReturnDivInt2Addr) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 4 << 12 | 0, Instruction::CONST_4 | 2 << 12 | 1 << 8, Instruction::DIV_INT_2ADDR | 1 << 12, @@ -599,38 +598,36 @@ TEST_F(CodegenTest, ReturnDivInt2Addr) { } // Helper method. -static void TestComparison(IfCondition condition, - int64_t i, - int64_t j, - Primitive::Type type, - const CodegenTargetConfig target_config) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); +void CodegenTest::TestComparison(IfCondition condition, + int64_t i, + int64_t j, + DataType::Type type, + const CodegenTargetConfig target_config) { + HGraph* graph = CreateGraph(); + + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); + entry_block->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit_block); graph->SetExitBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + exit_block->AddInstruction(new (GetAllocator()) HExit()); entry_block->AddSuccessor(block); block->AddSuccessor(exit_block); HInstruction* op1; HInstruction* op2; - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { op1 = graph->GetIntConstant(i); op2 = graph->GetIntConstant(j); } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); op1 = graph->GetLongConstant(i); op2 = graph->GetLongConstant(j); } @@ -641,48 +638,48 @@ static void TestComparison(IfCondition condition, const uint64_t y = j; switch (condition) { case kCondEQ: - comparison = new (&allocator) HEqual(op1, op2); + comparison = new (GetAllocator()) HEqual(op1, op2); expected_result = (i == j); break; case kCondNE: - comparison = new (&allocator) HNotEqual(op1, op2); + comparison = new (GetAllocator()) HNotEqual(op1, op2); expected_result = (i != j); break; case kCondLT: - comparison = new (&allocator) HLessThan(op1, op2); + comparison = new (GetAllocator()) HLessThan(op1, op2); expected_result = (i < j); break; case kCondLE: - comparison = new (&allocator) HLessThanOrEqual(op1, op2); + comparison = new (GetAllocator()) HLessThanOrEqual(op1, op2); expected_result = (i <= j); break; case kCondGT: - comparison = new (&allocator) HGreaterThan(op1, op2); + comparison = new (GetAllocator()) HGreaterThan(op1, op2); expected_result = (i > j); break; case kCondGE: - comparison = new (&allocator) HGreaterThanOrEqual(op1, op2); + comparison = new (GetAllocator()) HGreaterThanOrEqual(op1, op2); expected_result = (i >= j); break; case kCondB: - comparison = new (&allocator) HBelow(op1, op2); + comparison = new (GetAllocator()) HBelow(op1, op2); expected_result = (x < y); break; case kCondBE: - comparison = new (&allocator) HBelowOrEqual(op1, op2); + comparison = new (GetAllocator()) HBelowOrEqual(op1, op2); expected_result = (x <= y); break; case kCondA: - comparison = new (&allocator) HAbove(op1, op2); + comparison = new (GetAllocator()) HAbove(op1, op2); expected_result = (x > y); break; case kCondAE: - comparison = new (&allocator) HAboveOrEqual(op1, op2); + comparison = new (GetAllocator()) HAboveOrEqual(op1, op2); expected_result = (x >= y); break; } block->AddInstruction(comparison); - block->AddInstruction(new (&allocator) HReturn(comparison)); + block->AddInstruction(new (GetAllocator()) HReturn(comparison)); graph->BuildDominatorTree(); RunCode(target_config, graph, [](HGraph*) {}, true, expected_result); @@ -693,7 +690,8 @@ TEST_F(CodegenTest, ComparisonsInt) { for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { for (int cond = kCondFirst; cond <= kCondLast; cond++) { - TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimInt, target_config); + TestComparison( + static_cast<IfCondition>(cond), i, j, DataType::Type::kInt32, target_config); } } } @@ -705,7 +703,8 @@ TEST_F(CodegenTest, ComparisonsLong) { for (int64_t i = -1; i <= 1; i++) { for (int64_t j = -1; j <= 1; j++) { for (int cond = kCondFirst; cond <= kCondLast; cond++) { - TestComparison(static_cast<IfCondition>(cond), i, j, Primitive::kPrimLong, target_config); + TestComparison( + static_cast<IfCondition>(cond), i, j, DataType::Type::kInt64, target_config); } } } @@ -716,9 +715,7 @@ TEST_F(CodegenTest, ComparisonsLong) { TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { std::unique_ptr<const ArmInstructionSetFeatures> features( ArmInstructionSetFeatures::FromCppDefines()); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); arm::CodeGeneratorARMVIXL codegen(graph, *features.get(), CompilerOptions()); codegen.Initialize(); @@ -727,9 +724,9 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { // int mem2) which was faulty (before the fix). So previously GPR and FP scratch registers were // used as temps; however GPR scratch register is required for big stack offsets which don't fit // LDR encoding. So the following code is a regression test for that situation. - HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); - move->AddMove(Location::StackSlot(0), Location::StackSlot(8192), Primitive::kPrimInt, nullptr); - move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), Primitive::kPrimInt, nullptr); + HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator()); + move->AddMove(Location::StackSlot(0), Location::StackSlot(8192), DataType::Type::kInt32, nullptr); + move->AddMove(Location::StackSlot(8192), Location::StackSlot(0), DataType::Type::kInt32, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); InternalCodeAllocator code_allocator; @@ -742,9 +739,7 @@ TEST_F(CodegenTest, ARMVIXLParallelMoveResolver) { TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { std::unique_ptr<const Arm64InstructionSetFeatures> features( Arm64InstructionSetFeatures::FromCppDefines()); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); codegen.Initialize(); @@ -775,14 +770,14 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { // The solution used so far is to use a floating-point temp register // (D31) in step #2, so that IP1 is available for step #3. - HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); + HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator()); move->AddMove(Location::DoubleStackSlot(0), Location::DoubleStackSlot(257), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); move->AddMove(Location::DoubleStackSlot(257), Location::DoubleStackSlot(0), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); @@ -794,31 +789,29 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverB34760542) { TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { std::unique_ptr<const Arm64InstructionSetFeatures> features( Arm64InstructionSetFeatures::FromCppDefines()); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); arm64::CodeGeneratorARM64 codegen(graph, *features.get(), CompilerOptions()); codegen.Initialize(); graph->SetHasSIMD(true); for (int i = 0; i < 2; i++) { - HParallelMove* move = new (graph->GetArena()) HParallelMove(graph->GetArena()); + HParallelMove* move = new (graph->GetAllocator()) HParallelMove(graph->GetAllocator()); move->AddMove(Location::SIMDStackSlot(0), Location::SIMDStackSlot(257), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); move->AddMove(Location::SIMDStackSlot(257), Location::SIMDStackSlot(0), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); move->AddMove(Location::FpuRegisterLocation(0), Location::FpuRegisterLocation(1), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); move->AddMove(Location::FpuRegisterLocation(1), Location::FpuRegisterLocation(0), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); codegen.GetMoveResolver()->EmitNativeCode(move); graph->SetHasSIMD(false); @@ -833,39 +826,37 @@ TEST_F(CodegenTest, ARM64ParallelMoveResolverSIMD) { TEST_F(CodegenTest, MipsClobberRA) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); - if (!CanExecute(kMips) || features_mips->IsR6()) { + if (!CanExecute(InstructionSet::kMips) || features_mips->IsR6()) { // HMipsComputeBaseMethodAddress and the NAL instruction behind it // should only be generated on non-R6. return; } - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); - HBasicBlock* entry_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); - entry_block->AddInstruction(new (&allocator) HGoto()); + entry_block->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit_block); graph->SetExitBlock(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + exit_block->AddInstruction(new (GetAllocator()) HExit()); entry_block->AddSuccessor(block); block->AddSuccessor(exit_block); // To simplify matters, don't create PC-relative HLoadClass or HLoadString. // Instead, generate HMipsComputeBaseMethodAddress directly. - HMipsComputeBaseMethodAddress* base = new (&allocator) HMipsComputeBaseMethodAddress(); + HMipsComputeBaseMethodAddress* base = new (GetAllocator()) HMipsComputeBaseMethodAddress(); block->AddInstruction(base); // HMipsComputeBaseMethodAddress is defined as int, so just make the // compiled method return it. - block->AddInstruction(new (&allocator) HReturn(base)); + block->AddInstruction(new (GetAllocator()) HReturn(base)); graph->BuildDominatorTree(); diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h index 1b38acd8b0..c41c290c8b 100644 --- a/compiler/optimizing/codegen_test_utils.h +++ b/compiler/optimizing/codegen_test_utils.h @@ -28,6 +28,7 @@ #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86/registers_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" +#include "code_simulator.h" #include "code_simulator_container.h" #include "common_compiler_test.h" #include "graph_checker.h" @@ -78,6 +79,21 @@ class CodegenTargetConfig { }; #ifdef ART_ENABLE_CODEGEN_arm +// Special ARM code generator for codegen testing in a limited code +// generation environment (i.e. with no runtime support). +// +// Note: If we want to exercise certains HIR constructions +// (e.g. reference field load in Baker read barrier configuration) in +// codegen tests in the future, we should also: +// - save the Thread Register (R9) and possibly the Marking Register +// (R8) before entering the generated function (both registers are +// callee-save in AAPCS); +// - set these registers to meaningful values before or upon entering +// the generated function (so that generated code using them is +// correct); +// - restore their original values before leaving the generated +// function. + // Provide our own codegen, that ensures the C calling conventions // are preserved. Currently, ART and C do not match as R4 is caller-save // in ART, and callee-save in C. Alternatively, we could use or write @@ -99,6 +115,50 @@ class TestCodeGeneratorARMVIXL : public arm::CodeGeneratorARMVIXL { blocked_core_registers_[arm::R6] = false; blocked_core_registers_[arm::R7] = false; } + + void MaybeGenerateMarkingRegisterCheck(int code ATTRIBUTE_UNUSED, + Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + // When turned on, the marking register checks in + // CodeGeneratorARMVIXL::MaybeGenerateMarkingRegisterCheck expects the + // Thread Register and the Marking Register to be set to + // meaningful values. This is not the case in codegen testing, so + // just disable them entirely here (by doing nothing in this + // method). + } +}; +#endif + +#ifdef ART_ENABLE_CODEGEN_arm64 +// Special ARM64 code generator for codegen testing in a limited code +// generation environment (i.e. with no runtime support). +// +// Note: If we want to exercise certains HIR constructions +// (e.g. reference field load in Baker read barrier configuration) in +// codegen tests in the future, we should also: +// - save the Thread Register (X19) and possibly the Marking Register +// (X20) before entering the generated function (both registers are +// callee-save in AAPCS64); +// - set these registers to meaningful values before or upon entering +// the generated function (so that generated code using them is +// correct); +// - restore their original values before leaving the generated +// function. +class TestCodeGeneratorARM64 : public arm64::CodeGeneratorARM64 { + public: + TestCodeGeneratorARM64(HGraph* graph, + const Arm64InstructionSetFeatures& isa_features, + const CompilerOptions& compiler_options) + : arm64::CodeGeneratorARM64(graph, isa_features, compiler_options) {} + + void MaybeGenerateMarkingRegisterCheck(int codem ATTRIBUTE_UNUSED, + Location temp_loc ATTRIBUTE_UNUSED) OVERRIDE { + // When turned on, the marking register checks in + // CodeGeneratorARM64::MaybeGenerateMarkingRegisterCheck expect the + // Thread Register and the Marking Register to be set to + // meaningful values. This is not the case in codegen testing, so + // just disable them entirely here (by doing nothing in this + // method). + } }; #endif @@ -147,7 +207,7 @@ class InternalCodeAllocator : public CodeAllocator { static bool CanExecuteOnHardware(InstructionSet target_isa) { return (target_isa == kRuntimeISA) // Handle the special case of ARM, with two instructions sets (ARM32 and Thumb-2). - || (kRuntimeISA == kArm && target_isa == kThumb2); + || (kRuntimeISA == InstructionSet::kArm && target_isa == InstructionSet::kThumb2); } static bool CanExecute(InstructionSet target_isa) { @@ -211,7 +271,7 @@ static void Run(const InternalCodeAllocator& allocator, typedef Expected (*fptr)(); CommonCompilerTest::MakeExecutable(allocator.GetMemory(), allocator.GetSize()); fptr f = reinterpret_cast<fptr>(allocator.GetMemory()); - if (target_isa == kThumb2) { + if (target_isa == InstructionSet::kThumb2) { // For thumb we need the bottom bit set. f = reinterpret_cast<fptr>(reinterpret_cast<uintptr_t>(f) + 1); } @@ -235,10 +295,15 @@ static void RunCodeNoCheck(CodeGenerator* codegen, const std::function<void(HGraph*)>& hook_before_codegen, bool has_result, Expected expected) { - SsaLivenessAnalysis liveness(graph, codegen); - PrepareForRegisterAllocation(graph).Run(); - liveness.Analyze(); - RegisterAllocator::Create(graph->GetArena(), codegen, liveness)->AllocateRegisters(); + { + ScopedArenaAllocator local_allocator(graph->GetArenaStack()); + SsaLivenessAnalysis liveness(graph, codegen, &local_allocator); + PrepareForRegisterAllocation(graph).Run(); + liveness.Analyze(); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(&local_allocator, codegen, liveness); + register_allocator->AllocateRegisters(); + } hook_before_codegen(graph); InternalCodeAllocator allocator; codegen->Compile(&allocator); @@ -262,7 +327,8 @@ static void RunCode(CodegenTargetConfig target_config, bool has_result, Expected expected) { CompilerOptions compiler_options; - std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, compiler_options)); + std::unique_ptr<CodeGenerator> codegen(target_config.CreateCodeGenerator(graph, + compiler_options)); RunCode(codegen.get(), graph, hook_before_codegen, has_result, expected); } @@ -270,7 +336,7 @@ static void RunCode(CodegenTargetConfig target_config, CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const ArmInstructionSetFeatures> features_arm( ArmInstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) + return new (graph->GetAllocator()) TestCodeGeneratorARMVIXL(graph, *features_arm.get(), compiler_options); } #endif @@ -279,9 +345,8 @@ CodeGenerator* create_codegen_arm_vixl32(HGraph* graph, const CompilerOptions& c CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const Arm64InstructionSetFeatures> features_arm64( Arm64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) arm64::CodeGeneratorARM64(graph, - *features_arm64.get(), - compiler_options); + return new (graph->GetAllocator()) + TestCodeGeneratorARM64(graph, *features_arm64.get(), compiler_options); } #endif @@ -289,7 +354,8 @@ CodeGenerator* create_codegen_arm64(HGraph* graph, const CompilerOptions& compil CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) TestCodeGeneratorX86(graph, *features_x86.get(), compiler_options); + return new (graph->GetAllocator()) TestCodeGeneratorX86( + graph, *features_x86.get(), compiler_options); } #endif @@ -297,7 +363,7 @@ CodeGenerator* create_codegen_x86(HGraph* graph, const CompilerOptions& compiler CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const X86_64InstructionSetFeatures> features_x86_64( X86_64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) + return new (graph->GetAllocator()) x86_64::CodeGeneratorX86_64(graph, *features_x86_64.get(), compiler_options); } #endif @@ -306,7 +372,7 @@ CodeGenerator* create_codegen_x86_64(HGraph* graph, const CompilerOptions& compi CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const MipsInstructionSetFeatures> features_mips( MipsInstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) + return new (graph->GetAllocator()) mips::CodeGeneratorMIPS(graph, *features_mips.get(), compiler_options); } #endif @@ -315,7 +381,7 @@ CodeGenerator* create_codegen_mips(HGraph* graph, const CompilerOptions& compile CodeGenerator* create_codegen_mips64(HGraph* graph, const CompilerOptions& compiler_options) { std::unique_ptr<const Mips64InstructionSetFeatures> features_mips64( Mips64InstructionSetFeatures::FromCppDefines()); - return new (graph->GetArena()) + return new (graph->GetAllocator()) mips64::CodeGeneratorMIPS64(graph, *features_mips64.get(), compiler_options); } #endif diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h index 01304ac35b..356ff9f41f 100644 --- a/compiler/optimizing/common_arm.h +++ b/compiler/optimizing/common_arm.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ #define ART_COMPILER_OPTIMIZING_COMMON_ARM_H_ -#include "instruction_simplifier_shared.h" #include "debug/dwarf/register.h" +#include "instruction_simplifier_shared.h" #include "locations.h" #include "nodes.h" #include "utils/arm/constants_arm.h" @@ -76,8 +76,8 @@ inline vixl::aarch32::Register RegisterFrom(Location location) { return vixl::aarch32::Register(location.reg()); } -inline vixl::aarch32::Register RegisterFrom(Location location, Primitive::Type type) { - DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type; +inline vixl::aarch32::Register RegisterFrom(Location location, DataType::Type type) { + DCHECK(type != DataType::Type::kVoid && !DataType::IsFloatingPointType(type)) << type; return RegisterFrom(location); } @@ -94,20 +94,20 @@ inline vixl::aarch32::SRegister SRegisterFrom(Location location) { } inline vixl::aarch32::SRegister OutputSRegister(HInstruction* instr) { - Primitive::Type type = instr->GetType(); - DCHECK_EQ(type, Primitive::kPrimFloat) << type; + DataType::Type type = instr->GetType(); + DCHECK_EQ(type, DataType::Type::kFloat32) << type; return SRegisterFrom(instr->GetLocations()->Out()); } inline vixl::aarch32::DRegister OutputDRegister(HInstruction* instr) { - Primitive::Type type = instr->GetType(); - DCHECK_EQ(type, Primitive::kPrimDouble) << type; + DataType::Type type = instr->GetType(); + DCHECK_EQ(type, DataType::Type::kFloat64) << type; return DRegisterFrom(instr->GetLocations()->Out()); } inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) { - Primitive::Type type = instr->GetType(); - if (type == Primitive::kPrimFloat) { + DataType::Type type = instr->GetType(); + if (type == DataType::Type::kFloat32) { return OutputSRegister(instr); } else { return OutputDRegister(instr); @@ -115,23 +115,23 @@ inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) { } inline vixl::aarch32::SRegister InputSRegisterAt(HInstruction* instr, int input_index) { - Primitive::Type type = instr->InputAt(input_index)->GetType(); - DCHECK_EQ(type, Primitive::kPrimFloat) << type; + DataType::Type type = instr->InputAt(input_index)->GetType(); + DCHECK_EQ(type, DataType::Type::kFloat32) << type; return SRegisterFrom(instr->GetLocations()->InAt(input_index)); } inline vixl::aarch32::DRegister InputDRegisterAt(HInstruction* instr, int input_index) { - Primitive::Type type = instr->InputAt(input_index)->GetType(); - DCHECK_EQ(type, Primitive::kPrimDouble) << type; + DataType::Type type = instr->InputAt(input_index)->GetType(); + DCHECK_EQ(type, DataType::Type::kFloat64) << type; return DRegisterFrom(instr->GetLocations()->InAt(input_index)); } inline vixl::aarch32::VRegister InputVRegisterAt(HInstruction* instr, int input_index) { - Primitive::Type type = instr->InputAt(input_index)->GetType(); - if (type == Primitive::kPrimFloat) { + DataType::Type type = instr->InputAt(input_index)->GetType(); + if (type == DataType::Type::kFloat32) { return InputSRegisterAt(instr, input_index); } else { - DCHECK_EQ(type, Primitive::kPrimDouble); + DCHECK_EQ(type, DataType::Type::kFloat64); return InputDRegisterAt(instr, input_index); } } @@ -196,7 +196,7 @@ inline uint64_t Uint64ConstantFrom(HInstruction* instr) { return instr->AsConstant()->GetValueAsUint64(); } -inline vixl::aarch32::Operand OperandFrom(Location location, Primitive::Type type) { +inline vixl::aarch32::Operand OperandFrom(Location location, DataType::Type type) { if (location.IsRegister()) { return vixl::aarch32::Operand(RegisterFrom(location, type)); } else { @@ -227,14 +227,6 @@ inline Location LocationFrom(const vixl::aarch32::SRegister& low, return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode()); } -inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { - DCHECK(HasShifterOperand(instruction, kArm)); - // TODO: HAdd applied to the other integral types could make use of - // the SXTAB, SXTAH, UXTAB and UXTAH instructions. - return instruction->GetType() == Primitive::kPrimLong && - (instruction->IsAdd() || instruction->IsSub()); -} - } // namespace helpers } // namespace arm } // namespace art diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h index e73fd7ddc8..ed2f8e995d 100644 --- a/compiler/optimizing/common_arm64.h +++ b/compiler/optimizing/common_arm64.h @@ -73,9 +73,9 @@ inline vixl::aarch64::Register WRegisterFrom(Location location) { return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg())); } -inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) { - DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type; - return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location); +inline vixl::aarch64::Register RegisterFrom(Location location, DataType::Type type) { + DCHECK(type != DataType::Type::kVoid && !DataType::IsFloatingPointType(type)) << type; + return type == DataType::Type::kInt64 ? XRegisterFrom(location) : WRegisterFrom(location); } inline vixl::aarch64::Register OutputRegister(HInstruction* instr) { @@ -107,9 +107,9 @@ inline vixl::aarch64::FPRegister SRegisterFrom(Location location) { return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg()); } -inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) { - DCHECK(Primitive::IsFloatingPointType(type)) << type; - return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location); +inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, DataType::Type type) { + DCHECK(DataType::IsFloatingPointType(type)) << type; + return type == DataType::Type::kFloat64 ? DRegisterFrom(location) : SRegisterFrom(location); } inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) { @@ -121,20 +121,20 @@ inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int inpu instr->InputAt(input_index)->GetType()); } -inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) { - return Primitive::IsFloatingPointType(type) +inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, DataType::Type type) { + return DataType::IsFloatingPointType(type) ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type)) : vixl::aarch64::CPURegister(RegisterFrom(location, type)); } inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) { - return Primitive::IsFloatingPointType(instr->GetType()) + return DataType::IsFloatingPointType(instr->GetType()) ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr)) : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr)); } inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) { - return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType()) + return DataType::IsFloatingPointType(instr->InputAt(index)->GetType()) ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index)) : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index)); } @@ -142,9 +142,9 @@ inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int in inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* instr, int index) { HInstruction* input = instr->InputAt(index); - Primitive::Type input_type = input->GetType(); + DataType::Type input_type = input->GetType(); if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) { - return (Primitive::ComponentSize(input_type) >= vixl::aarch64::kXRegSizeInBytes) + return (DataType::Size(input_type) >= vixl::aarch64::kXRegSizeInBytes) ? vixl::aarch64::Register(vixl::aarch64::xzr) : vixl::aarch64::Register(vixl::aarch64::wzr); } @@ -163,7 +163,7 @@ inline int64_t Int64ConstantFrom(Location location) { } } -inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) { +inline vixl::aarch64::Operand OperandFrom(Location location, DataType::Type type) { if (location.IsRegister()) { return vixl::aarch64::Operand(RegisterFrom(location, type)); } else { @@ -202,7 +202,7 @@ inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base } inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) { - return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset); + return HeapOperand(RegisterFrom(location, DataType::Type::kReference), offset); } inline Location LocationFrom(const vixl::aarch64::Register& reg) { @@ -342,7 +342,7 @@ inline vixl::aarch64::Extend ExtendFromOpKind(HDataProcWithShifterOp::OpKind op_ } inline bool ShifterOperandSupportsExtension(HInstruction* instruction) { - DCHECK(HasShifterOperand(instruction, kArm64)); + DCHECK(HasShifterOperand(instruction, InstructionSet::kArm64)); // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg` // does *not* support extension. This is because the `extended register` form // of the `sub` instruction interprets the left register with code 31 as the diff --git a/compiler/optimizing/constant_folding.cc b/compiler/optimizing/constant_folding.cc index 5f39a49d68..6f11e628ee 100644 --- a/compiler/optimizing/constant_folding.cc +++ b/compiler/optimizing/constant_folding.cc @@ -113,7 +113,7 @@ void HConstantFoldingVisitor::VisitBinaryOperation(HBinaryOperation* inst) { void HConstantFoldingVisitor::VisitTypeConversion(HTypeConversion* inst) { // Constant folding: replace `TypeConversion(a)' with a constant at // compile time if `a' is a constant. - HConstant* constant = inst->AsTypeConversion()->TryStaticEvaluation(); + HConstant* constant = inst->TryStaticEvaluation(); if (constant != nullptr) { inst->ReplaceWith(constant); inst->GetBlock()->RemoveInstruction(inst); @@ -150,7 +150,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitEqual(HEqual* instruction) { // EQUAL lhs, null // where lhs cannot be null with // CONSTANT false - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -162,7 +162,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitNotEqual(HNotEqual* instructi // NOT_EQUAL lhs, null // where lhs cannot be null with // CONSTANT true - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -174,7 +174,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitAbove(HAbove* instruction) { // ABOVE dst, 0, src // unsigned 0 > src is always false // with // CONSTANT false - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -186,7 +186,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitAboveOrEqual(HAboveOrEqual* i // ABOVE_OR_EQUAL dst, src, 0 // unsigned src >= 0 is always true // with // CONSTANT true - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -198,7 +198,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelow(HBelow* instruction) { // BELOW dst, src, 0 // unsigned src < 0 is always false // with // CONSTANT false - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 0)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 0)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -210,7 +210,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitBelowOrEqual(HBelowOrEqual* i // BELOW_OR_EQUAL dst, 0, src // unsigned 0 <= src is always true // with // CONSTANT true - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimBoolean, 1)); + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kBool, 1)); instruction->GetBlock()->RemoveInstruction(instruction); } } @@ -231,7 +231,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction HConstant* input_cst = instruction->GetConstantRight(); if (input_cst != nullptr) { HInstruction* input_value = instruction->GetLeastConstantLeft(); - if (Primitive::IsFloatingPointType(input_value->GetType()) && + if (DataType::IsFloatingPointType(input_value->GetType()) && ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->IsNaN()) || (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->IsNaN()))) { // Replace code looking like @@ -240,7 +240,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction // CONSTANT +1 (gt bias) // or // CONSTANT -1 (lt bias) - instruction->ReplaceWith(GetGraph()->GetConstant(Primitive::kPrimInt, + instruction->ReplaceWith(GetGraph()->GetConstant(DataType::Type::kInt32, (instruction->IsGtBias() ? 1 : -1))); instruction->GetBlock()->RemoveInstruction(instruction); } @@ -249,8 +249,8 @@ void InstructionWithAbsorbingInputSimplifier::VisitCompare(HCompare* instruction void InstructionWithAbsorbingInputSimplifier::VisitMul(HMul* instruction) { HConstant* input_cst = instruction->GetConstantRight(); - Primitive::Type type = instruction->GetType(); - if (Primitive::IsIntOrLongType(type) && + DataType::Type type = instruction->GetType(); + if (DataType::IsIntOrLongType(type) && (input_cst != nullptr) && input_cst->IsArithmeticZero()) { // Replace code looking like // MUL dst, src, 0 @@ -282,9 +282,9 @@ void InstructionWithAbsorbingInputSimplifier::VisitOr(HOr* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitRem(HRem* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { + if (!DataType::IsIntegralType(type)) { return; } @@ -326,9 +326,9 @@ void InstructionWithAbsorbingInputSimplifier::VisitShr(HShr* instruction) { } void InstructionWithAbsorbingInputSimplifier::VisitSub(HSub* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { + if (!DataType::IsIntegralType(type)) { return; } @@ -360,7 +360,7 @@ void InstructionWithAbsorbingInputSimplifier::VisitXor(HXor* instruction) { // XOR dst, src, src // with // CONSTANT 0 - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); HBasicBlock* block = instruction->GetBlock(); instruction->ReplaceWith(GetGraph()->GetConstant(type, 0)); block->RemoveInstruction(instruction); diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc index 7ef28ed910..d27104752b 100644 --- a/compiler/optimizing/constant_folding_test.cc +++ b/compiler/optimizing/constant_folding_test.cc @@ -32,19 +32,17 @@ namespace art { /** * Fixture class for the constant folding and dce tests. */ -class ConstantFoldingTest : public CommonCompilerTest { +class ConstantFoldingTest : public OptimizingUnitTest { public: - ConstantFoldingTest() : pool_(), allocator_(&pool_) { - graph_ = CreateGraph(&allocator_); - } + ConstantFoldingTest() : graph_(nullptr) { } - void TestCode(const uint16_t* data, + void TestCode(const std::vector<uint16_t>& data, const std::string& expected_before, const std::string& expected_after_cf, const std::string& expected_after_dce, const std::function<void(HGraph*)>& check_after_cf, - Primitive::Type return_type = Primitive::kPrimInt) { - graph_ = CreateCFG(&allocator_, data, return_type); + DataType::Type return_type = DataType::Type::kInt32) { + graph_ = CreateCFG(data, return_type); TestCodeOnReadyGraph(expected_before, expected_after_cf, expected_after_dce, @@ -88,8 +86,6 @@ class ConstantFoldingTest : public CommonCompilerTest { EXPECT_EQ(expected_after_dce, actual_after_dce); } - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; }; @@ -104,7 +100,7 @@ class ConstantFoldingTest : public CommonCompilerTest { * return v1 2. return v1 */ TEST_F(ConstantFoldingTest, IntConstantFoldingNegation) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::NEG_INT | 1 << 8 | 0 << 12, Instruction::RETURN | 1 << 8); @@ -165,7 +161,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { const uint16_t word1 = High16Bits(Low32Bits(input)); const uint16_t word2 = Low16Bits(High32Bits(input)); const uint16_t word3 = High16Bits(High32Bits(input)); // MSW. - const uint16_t data[] = FOUR_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = FOUR_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE | 0 << 8, word0, word1, word2, word3, Instruction::NEG_LONG | 2 << 8 | 0 << 12, Instruction::RETURN_WIDE | 2 << 8); @@ -208,7 +204,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { expected_after_cf, expected_after_dce, check_after_cf, - Primitive::kPrimLong); + DataType::Type::kInt64); } /** @@ -223,7 +219,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingNegation) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -288,7 +284,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition1) { * return v2 8. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT_2ADDR | 0 << 8 | 1 << 12, @@ -373,7 +369,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnAddition2) { * return v2 4. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 3 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::SUB_INT | 2 << 8, 0 | 1 << 8, @@ -436,7 +432,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingOnSubtraction) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 1, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::ADD_LONG | 4 << 8, 0 | 2 << 8, @@ -483,7 +479,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { expected_after_cf, expected_after_dce, check_after_cf, - Primitive::kPrimLong); + DataType::Type::kInt64); } /** @@ -500,7 +496,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnAddition) { * return (v4, v5) 6. return-wide v4 */ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_WIDE_16 | 0 << 8, 3, Instruction::CONST_WIDE_16 | 2 << 8, 2, Instruction::SUB_LONG | 4 << 8, 0 | 2 << 8, @@ -547,7 +543,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { expected_after_cf, expected_after_dce, check_after_cf, - Primitive::kPrimLong); + DataType::Type::kInt64); } /** @@ -573,7 +569,7 @@ TEST_F(ConstantFoldingTest, LongConstantFoldingOnSubtraction) { * return v2 13. return v2 */ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 1 << 12, Instruction::CONST_4 | 1 << 8 | 2 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, @@ -676,7 +672,7 @@ TEST_F(ConstantFoldingTest, IntConstantFoldingAndJumps) { * return-void 7. return */ TEST_F(ConstantFoldingTest, ConstantCondition) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, @@ -742,46 +738,46 @@ TEST_F(ConstantFoldingTest, ConstantCondition) { * in the bytecode, we need to set up the graph explicitly. */ TEST_F(ConstantFoldingTest, UnsignedComparisonsWithZero) { - graph_ = CreateGraph(&allocator_); - HBasicBlock* entry_block = new (&allocator_) HBasicBlock(graph_); + graph_ = CreateGraph(); + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_block); graph_->SetEntryBlock(entry_block); - HBasicBlock* block = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(block); - HBasicBlock* exit_block = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit_block); graph_->SetExitBlock(exit_block); entry_block->AddSuccessor(block); block->AddSuccessor(exit_block); // Make various unsigned comparisons with zero against a parameter. - HInstruction* parameter = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt, true); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32, true); entry_block->AddInstruction(parameter); - entry_block->AddInstruction(new (&allocator_) HGoto()); + entry_block->AddInstruction(new (GetAllocator()) HGoto()); HInstruction* zero = graph_->GetIntConstant(0); HInstruction* last; - block->AddInstruction(last = new (&allocator_) HAbove(zero, parameter)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HAbove(parameter, zero)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HAboveOrEqual(zero, parameter)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HAboveOrEqual(parameter, zero)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HBelow(zero, parameter)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HBelow(parameter, zero)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HBelowOrEqual(zero, parameter)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(last = new (&allocator_) HBelowOrEqual(parameter, zero)); - block->AddInstruction(new (&allocator_) HSelect(last, parameter, parameter, 0)); - block->AddInstruction(new (&allocator_) HReturn(zero)); - - exit_block->AddInstruction(new (&allocator_) HExit()); + block->AddInstruction(last = new (GetAllocator()) HAbove(zero, parameter)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HAbove(parameter, zero)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HAboveOrEqual(zero, parameter)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HAboveOrEqual(parameter, zero)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HBelow(zero, parameter)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HBelow(parameter, zero)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HBelowOrEqual(zero, parameter)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(last = new (GetAllocator()) HBelowOrEqual(parameter, zero)); + block->AddInstruction(new (GetAllocator()) HSelect(last, parameter, parameter, 0)); + block->AddInstruction(new (GetAllocator()) HReturn(zero)); + + exit_block->AddInstruction(new (GetAllocator()) HExit()); graph_->BuildDominatorTree(); diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.cc b/compiler/optimizing/constructor_fence_redundancy_elimination.cc new file mode 100644 index 0000000000..4a66cd2265 --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.cc @@ -0,0 +1,262 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "constructor_fence_redundancy_elimination.h" + +#include "base/arena_allocator.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" + +namespace art { + +static constexpr bool kCfreLogFenceInputCount = false; + +// TODO: refactor this code by reusing escape analysis. +class CFREVisitor : public HGraphVisitor { + public: + CFREVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + scoped_allocator_(graph->GetArenaStack()), + candidate_fences_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + candidate_fence_targets_(scoped_allocator_.Adapter(kArenaAllocCFRE)), + stats_(stats) {} + + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // Visit all instructions in block. + HGraphVisitor::VisitBasicBlock(block); + + // If there were any unmerged fences left, merge them together, + // the objects are considered 'published' at the end of the block. + MergeCandidateFences(); + } + + void VisitConstructorFence(HConstructorFence* constructor_fence) OVERRIDE { + candidate_fences_.push_back(constructor_fence); + + for (size_t input_idx = 0; input_idx < constructor_fence->InputCount(); ++input_idx) { + candidate_fence_targets_.Insert(constructor_fence->InputAt(input_idx)); + } + } + + void VisitBoundType(HBoundType* bound_type) OVERRIDE { + VisitAlias(bound_type); + } + + void VisitNullCheck(HNullCheck* null_check) OVERRIDE { + VisitAlias(null_check); + } + + void VisitSelect(HSelect* select) OVERRIDE { + VisitAlias(select); + } + + void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(1); + VisitSetLocation(instruction, value); + } + + void VisitArraySet(HArraySet* instruction) OVERRIDE { + HInstruction* value = instruction->InputAt(2); + VisitSetLocation(instruction, value); + } + + void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) { + // Pessimize: Merge all fences. + MergeCandidateFences(); + } + + void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + HandleInvoke(invoke); + } + + void VisitClinitCheck(HClinitCheck* clinit) OVERRIDE { + HandleInvoke(clinit); + } + + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) OVERRIDE { + // Conservatively treat it as an invocation. + HandleInvoke(instruction); + } + + private: + void HandleInvoke(HInstruction* invoke) { + // An object is considered "published" if it escapes into an invoke as any of the parameters. + if (HasInterestingPublishTargetAsInput(invoke)) { + MergeCandidateFences(); + } + } + + // Called by any instruction visitor that may create an alias. + // These instructions may create an alias: + // - BoundType + // - NullCheck + // - Select + // + // These also create an alias, but are not handled by this function: + // - Phi: propagates values across blocks, but we always merge at the end of a block. + // - Invoke: this is handled by HandleInvoke. + void VisitAlias(HInstruction* aliasing_inst) { + // An object is considered "published" if it becomes aliased by other instructions. + if (HasInterestingPublishTargetAsInput(aliasing_inst)) { + // Note that constructing a "NullCheck" for new-instance, new-array, + // or a 'this' (receiver) reference is impossible. + // + // If by some reason we actually encounter such a NullCheck(FenceTarget), + // we LOG(WARNING). + if (UNLIKELY(aliasing_inst->IsNullCheck())) { + LOG(kIsDebugBuild ? FATAL : WARNING) + << "Unexpected instruction: NullCheck; should not be legal in graph"; + // We then do a best-effort to handle this case. + } + MergeCandidateFences(); + } + } + + void VisitSetLocation(HInstruction* inst ATTRIBUTE_UNUSED, HInstruction* store_input) { + // An object is considered "published" if it's stored onto the heap. + // Sidenote: A later "LSE" pass can still remove the fence if it proves the + // object doesn't actually escape. + if (IsInterestingPublishTarget(store_input)) { + // Merge all constructor fences that we've seen since + // the last interesting store (or since the beginning). + MergeCandidateFences(); + } + } + + bool HasInterestingPublishTargetAsInput(HInstruction* inst) { + for (size_t input_count = 0; input_count < inst->InputCount(); ++input_count) { + if (IsInterestingPublishTarget(inst->InputAt(input_count))) { + return true; + } + } + + return false; + } + + // Merges all the existing fences we've seen so far into the last-most fence. + // + // This resets the list of candidate fences and their targets back to {}. + void MergeCandidateFences() { + if (candidate_fences_.empty()) { + // Nothing to do, need 1+ fences to merge. + return; + } + + // The merge target is always the "last" candidate fence. + HConstructorFence* merge_target = candidate_fences_[candidate_fences_.size() - 1]; + + for (HConstructorFence* fence : candidate_fences_) { + MaybeMerge(merge_target, fence); + } + + if (kCfreLogFenceInputCount) { + LOG(INFO) << "CFRE-MergeCandidateFences: Post-merge fence input count " + << merge_target->InputCount(); + } + + // Each merge acts as a cut-off point. The optimization is reset completely. + // In theory, we could push the fence as far as its publish, but in practice + // there is no benefit to this extra complexity unless we also reordered + // the stores to come later. + candidate_fences_.clear(); + candidate_fence_targets_.Clear(); + } + + // A publishing 'store' is only interesting if the value being stored + // is one of the fence `targets` in `candidate_fences`. + bool IsInterestingPublishTarget(HInstruction* store_input) const { + return candidate_fence_targets_.Find(store_input) != candidate_fence_targets_.end(); + } + + void MaybeMerge(HConstructorFence* target, HConstructorFence* src) { + if (target == src) { + return; // Don't merge a fence into itself. + // This is mostly for stats-purposes, we don't want to count merge(x,x) + // as removing a fence because it's a no-op. + } + + target->Merge(src); + + MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedCFRE); + } + + // Phase-local heap memory allocator for CFRE optimizer. + ScopedArenaAllocator scoped_allocator_; + + // Set of constructor fences that we've seen in the current block. + // Each constructor fences acts as a guard for one or more `targets`. + // There exist no stores to any `targets` between any of these fences. + // + // Fences are in succession order (e.g. fence[i] succeeds fence[i-1] + // within the same basic block). + ScopedArenaVector<HConstructorFence*> candidate_fences_; + + // Stores a set of the fence targets, to allow faster lookup of whether + // a detected publish is a target of one of the candidate fences. + ScopedArenaHashSet<HInstruction*> candidate_fence_targets_; + + // Used to record stats about the optimization. + OptimizingCompilerStats* const stats_; + + DISALLOW_COPY_AND_ASSIGN(CFREVisitor); +}; + +void ConstructorFenceRedundancyElimination::Run() { + CFREVisitor cfre_visitor(graph_, stats_); + + // Arbitrarily visit in reverse-post order. + // The exact block visit order does not matter, as the algorithm + // only operates on a single block at a time. + cfre_visitor.VisitReversePostOrder(); +} + +} // namespace art diff --git a/compiler/optimizing/constructor_fence_redundancy_elimination.h b/compiler/optimizing/constructor_fence_redundancy_elimination.h new file mode 100644 index 0000000000..f4b06d5544 --- /dev/null +++ b/compiler/optimizing/constructor_fence_redundancy_elimination.h @@ -0,0 +1,65 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ +#define ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ + +#include "optimization.h" + +namespace art { + +/* + * Constructor Fence Redundancy Elimination (CFRE). + * + * A local optimization pass that merges redundant constructor fences + * together within the same basic block. + * + * Abbreviations: + * - CF: Constructor Fence + * - CFS: Constructor Fence Set + * - CFTargets: The unique set of the inputs of all the instructions in CFS. + * + * Given any CFS = { CF(x), CF(y), CF(z), ... }, define CFTargets = { x, y, z, ... }. + * - Publish(R) must not exist for any R in CFTargets if this Publish(R) is between any CF in CFS. + * - This type of Publish(R) is called an "interesting publish". + * + * A Publish(R) is considered any instruction at which the reference to "R" + * may escape (e.g. invoke, store, return, etc) to another thread. + * + * Starting at the beginning of the block: + * - Find the largest contiguous CFS. + * - If we see an interesting publish, merge all instructions in CFS into a single CF(CFTargets). + * - Repeat until the block is fully visited. + * - At the end of the block, merge all instructions in CFS into a single CF(CFTargets). + */ +class ConstructorFenceRedundancyElimination : public HOptimization { + public: + ConstructorFenceRedundancyElimination(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kCFREPassName) + : HOptimization(graph, name, stats) {} + + void Run() OVERRIDE; + + static constexpr const char* kCFREPassName = "constructor_fence_redundancy_elimination"; + + private: + DISALLOW_COPY_AND_ASSIGN(ConstructorFenceRedundancyElimination); +}; + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_CONSTRUCTOR_FENCE_REDUNDANCY_ELIMINATION_H_ diff --git a/compiler/optimizing/data_type-inl.h b/compiler/optimizing/data_type-inl.h new file mode 100644 index 0000000000..94807e8fc9 --- /dev/null +++ b/compiler/optimizing/data_type-inl.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DATA_TYPE_INL_H_ +#define ART_COMPILER_OPTIMIZING_DATA_TYPE_INL_H_ + +#include "data_type.h" +#include "dex/primitive.h" + +namespace art { + +// Note: Not declared in data_type.h to avoid pulling in "primitive.h". +constexpr DataType::Type DataTypeFromPrimitive(Primitive::Type type) { + switch (type) { + case Primitive::kPrimNot: return DataType::Type::kReference; + case Primitive::kPrimBoolean: return DataType::Type::kBool; + case Primitive::kPrimByte: return DataType::Type::kInt8; + case Primitive::kPrimChar: return DataType::Type::kUint16; + case Primitive::kPrimShort: return DataType::Type::kInt16; + case Primitive::kPrimInt: return DataType::Type::kInt32; + case Primitive::kPrimLong: return DataType::Type::kInt64; + case Primitive::kPrimFloat: return DataType::Type::kFloat32; + case Primitive::kPrimDouble: return DataType::Type::kFloat64; + case Primitive::kPrimVoid: return DataType::Type::kVoid; + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +constexpr DataType::Type DataType::FromShorty(char type) { + return DataTypeFromPrimitive(Primitive::GetType(type)); +} + +constexpr char DataType::TypeId(DataType::Type type) { + // Type id for visualizer. + // Types corresponding to Java types are given a lower-case version of their shorty character. + switch (type) { + case DataType::Type::kBool: return 'z'; // Java boolean (Z). + case DataType::Type::kUint8: return 'a'; // The character before Java byte's 'b'. + case DataType::Type::kInt8: return 'b'; // Java byte (B). + case DataType::Type::kUint16: return 'c'; // Java char (C). + case DataType::Type::kInt16: return 's'; // Java short (S). + case DataType::Type::kUint32: return 'u'; // Picked 'u' for unsigned. + case DataType::Type::kInt32: return 'i'; // Java int (I). + case DataType::Type::kUint64: return 'w'; // Picked 'w' for long unsigned. + case DataType::Type::kInt64: return 'j'; // Java long (J). + case DataType::Type::kFloat32: return 'f'; // Java float (F). + case DataType::Type::kFloat64: return 'd'; // Java double (D). + case DataType::Type::kReference: return 'l'; // Java reference (L). + case DataType::Type::kVoid: return 'v'; // Java void (V). + } + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); +} + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DATA_TYPE_INL_H_ diff --git a/compiler/optimizing/data_type.cc b/compiler/optimizing/data_type.cc new file mode 100644 index 0000000000..cb354f46cc --- /dev/null +++ b/compiler/optimizing/data_type.cc @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "data_type.h" + +namespace art { + +static const char* kTypeNames[] = { + "Reference", + "Bool", + "Uint8", + "Int8", + "Uint16", + "Int16", + "Uint32", + "Int32", + "Uint64", + "Int64", + "Float32", + "Float64", + "Void", +}; + +const char* DataType::PrettyDescriptor(Type type) { + static_assert(arraysize(kTypeNames) == static_cast<size_t>(Type::kLast) + 1, + "Missing element"); + uint32_t uint_type = static_cast<uint32_t>(type); + CHECK_LE(uint_type, static_cast<uint32_t>(Type::kLast)); + return kTypeNames[uint_type]; +} + +std::ostream& operator<<(std::ostream& os, DataType::Type type) { + uint32_t uint_type = static_cast<uint32_t>(type); + if (uint_type <= static_cast<uint32_t>(DataType::Type::kLast)) { + os << kTypeNames[uint_type]; + } else { + os << "Type[" << uint_type << "]"; + } + return os; +} + +} // namespace art diff --git a/compiler/optimizing/data_type.h b/compiler/optimizing/data_type.h new file mode 100644 index 0000000000..4a6c91459f --- /dev/null +++ b/compiler/optimizing/data_type.h @@ -0,0 +1,253 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_DATA_TYPE_H_ +#define ART_COMPILER_OPTIMIZING_DATA_TYPE_H_ + +#include <iosfwd> + +#include <android-base/logging.h> + +#include "base/bit_utils.h" + +namespace art { + +class DataType { + public: + enum class Type : uint8_t { + kReference = 0, + kBool, + kUint8, + kInt8, + kUint16, + kInt16, + kUint32, + kInt32, + kUint64, + kInt64, + kFloat32, + kFloat64, + kVoid, + kLast = kVoid + }; + + static constexpr Type FromShorty(char type); + static constexpr char TypeId(DataType::Type type); + + static constexpr size_t SizeShift(Type type) { + switch (type) { + case Type::kVoid: + case Type::kBool: + case Type::kUint8: + case Type::kInt8: + return 0; + case Type::kUint16: + case Type::kInt16: + return 1; + case Type::kUint32: + case Type::kInt32: + case Type::kFloat32: + return 2; + case Type::kUint64: + case Type::kInt64: + case Type::kFloat64: + return 3; + case Type::kReference: + return WhichPowerOf2(kObjectReferenceSize); + default: + LOG(FATAL) << "Invalid type " << static_cast<int>(type); + return 0; + } + } + + static constexpr size_t Size(Type type) { + switch (type) { + case Type::kVoid: + return 0; + case Type::kBool: + case Type::kUint8: + case Type::kInt8: + return 1; + case Type::kUint16: + case Type::kInt16: + return 2; + case Type::kUint32: + case Type::kInt32: + case Type::kFloat32: + return 4; + case Type::kUint64: + case Type::kInt64: + case Type::kFloat64: + return 8; + case Type::kReference: + return kObjectReferenceSize; + default: + LOG(FATAL) << "Invalid type " << static_cast<int>(type); + return 0; + } + } + + static bool IsFloatingPointType(Type type) { + return type == Type::kFloat32 || type == Type::kFloat64; + } + + static bool IsIntegralType(Type type) { + // The Java language does not allow treating boolean as an integral type but + // our bit representation makes it safe. + switch (type) { + case Type::kBool: + case Type::kUint8: + case Type::kInt8: + case Type::kUint16: + case Type::kInt16: + case Type::kUint32: + case Type::kInt32: + case Type::kUint64: + case Type::kInt64: + return true; + default: + return false; + } + } + + static bool IsIntOrLongType(Type type) { + return type == Type::kInt32 || type == Type::kInt64; + } + + static bool Is64BitType(Type type) { + return type == Type::kUint64 || type == Type::kInt64 || type == Type::kFloat64; + } + + static bool IsUnsignedType(Type type) { + return type == Type::kBool || type == Type::kUint8 || type == Type::kUint16 || + type == Type::kUint32 || type == Type::kUint64; + } + + // Return the general kind of `type`, fusing integer-like types as Type::kInt. + static Type Kind(Type type) { + switch (type) { + case Type::kBool: + case Type::kUint8: + case Type::kInt8: + case Type::kUint16: + case Type::kInt16: + case Type::kUint32: + case Type::kInt32: + return Type::kInt32; + case Type::kUint64: + case Type::kInt64: + return Type::kInt64; + default: + return type; + } + } + + static int64_t MinValueOfIntegralType(Type type) { + switch (type) { + case Type::kBool: + return std::numeric_limits<bool>::min(); + case Type::kUint8: + return std::numeric_limits<uint8_t>::min(); + case Type::kInt8: + return std::numeric_limits<int8_t>::min(); + case Type::kUint16: + return std::numeric_limits<uint16_t>::min(); + case Type::kInt16: + return std::numeric_limits<int16_t>::min(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::min(); + case Type::kInt32: + return std::numeric_limits<int32_t>::min(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::min(); + case Type::kInt64: + return std::numeric_limits<int64_t>::min(); + default: + LOG(FATAL) << "non integral type"; + } + return 0; + } + + static int64_t MaxValueOfIntegralType(Type type) { + switch (type) { + case Type::kBool: + return std::numeric_limits<bool>::max(); + case Type::kUint8: + return std::numeric_limits<uint8_t>::max(); + case Type::kInt8: + return std::numeric_limits<int8_t>::max(); + case Type::kUint16: + return std::numeric_limits<uint16_t>::max(); + case Type::kInt16: + return std::numeric_limits<int16_t>::max(); + case Type::kUint32: + return std::numeric_limits<uint32_t>::max(); + case Type::kInt32: + return std::numeric_limits<int32_t>::max(); + case Type::kUint64: + return std::numeric_limits<uint64_t>::max(); + case Type::kInt64: + return std::numeric_limits<int64_t>::max(); + default: + LOG(FATAL) << "non integral type"; + } + return 0; + } + + static bool IsTypeConversionImplicit(Type input_type, Type result_type); + static bool IsTypeConversionImplicit(int64_t value, Type result_type); + + static const char* PrettyDescriptor(Type type); + + private: + static constexpr size_t kObjectReferenceSize = 4u; +}; +std::ostream& operator<<(std::ostream& os, DataType::Type data_type); + +// Defined outside DataType to have the operator<< available for DCHECK_NE(). +inline bool DataType::IsTypeConversionImplicit(Type input_type, Type result_type) { + DCHECK_NE(DataType::Type::kVoid, result_type); + DCHECK_NE(DataType::Type::kVoid, input_type); + + // Invariant: We should never generate a conversion to a Boolean value. + DCHECK_NE(DataType::Type::kBool, result_type); + + // Besides conversion to the same type, integral conversions to non-Int64 types + // are implicit if the result value range covers the input value range, i.e. + // widening conversions that do not need to trim the sign bits. + return result_type == input_type || + (result_type != Type::kInt64 && + IsIntegralType(input_type) && + IsIntegralType(result_type) && + MinValueOfIntegralType(input_type) >= MinValueOfIntegralType(result_type) && + MaxValueOfIntegralType(input_type) <= MaxValueOfIntegralType(result_type)); +} + +inline bool DataType::IsTypeConversionImplicit(int64_t value, Type result_type) { + if (IsIntegralType(result_type) && result_type != Type::kInt64) { + // If the constant value falls in the range of the result_type, type + // conversion isn't needed. + return value >= MinValueOfIntegralType(result_type) && + value <= MaxValueOfIntegralType(result_type); + } + // Conversion isn't implicit if it's into non-integer types, or 64-bit int + // which may have different number of registers. + return false; +} + +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_DATA_TYPE_H_ diff --git a/compiler/optimizing/data_type_test.cc b/compiler/optimizing/data_type_test.cc new file mode 100644 index 0000000000..8fea22bce8 --- /dev/null +++ b/compiler/optimizing/data_type_test.cc @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <gtest/gtest.h> + +#include "data_type-inl.h" + +#include "base/array_ref.h" +#include "base/macros.h" +#include "dex/primitive.h" + +namespace art { + +template <DataType::Type data_type, Primitive::Type primitive_type> +static void CheckConversion() { + static_assert(data_type == DataTypeFromPrimitive(primitive_type), "Conversion check."); + static_assert(DataType::Size(data_type) == Primitive::ComponentSize(primitive_type), + "Size check."); +} + +TEST(DataType, SizeAgainstPrimitive) { + CheckConversion<DataType::Type::kVoid, Primitive::kPrimVoid>(); + CheckConversion<DataType::Type::kBool, Primitive::kPrimBoolean>(); + CheckConversion<DataType::Type::kInt8, Primitive::kPrimByte>(); + CheckConversion<DataType::Type::kUint16, Primitive::kPrimChar>(); + CheckConversion<DataType::Type::kInt16, Primitive::kPrimShort>(); + CheckConversion<DataType::Type::kInt32, Primitive::kPrimInt>(); + CheckConversion<DataType::Type::kInt64, Primitive::kPrimLong>(); + CheckConversion<DataType::Type::kFloat32, Primitive::kPrimFloat>(); + CheckConversion<DataType::Type::kFloat64, Primitive::kPrimDouble>(); + CheckConversion<DataType::Type::kReference, Primitive::kPrimNot>(); +} + +TEST(DataType, Names) { +#define CHECK_NAME(type) EXPECT_STREQ(#type, DataType::PrettyDescriptor(DataType::Type::k##type)) + CHECK_NAME(Void); + CHECK_NAME(Bool); + CHECK_NAME(Int8); + CHECK_NAME(Uint16); + CHECK_NAME(Int16); + CHECK_NAME(Int32); + CHECK_NAME(Int64); + CHECK_NAME(Float32); + CHECK_NAME(Float64); + CHECK_NAME(Reference); +#undef CHECK_NAME +} + +TEST(DataType, IsTypeConversionImplicit) { + static const DataType::Type kIntegralTypes[] = { + DataType::Type::kBool, + DataType::Type::kUint8, + DataType::Type::kInt8, + DataType::Type::kUint16, + DataType::Type::kInt16, + DataType::Type::kInt32, + DataType::Type::kInt64, + }; + const ArrayRef<const DataType::Type> kIntegralInputTypes(kIntegralTypes); + // Note: kBool cannot be used as a result type. + DCHECK_EQ(kIntegralTypes[0], DataType::Type::kBool); + const ArrayRef<const DataType::Type> kIntegralResultTypes = kIntegralInputTypes.SubArray(1u); + + static const bool kImplicitIntegralConversions[][arraysize(kIntegralTypes)] = { + // Bool Uint8 Int8 Uint16 Int16 Int32 Int64 + { /* Bool N/A */ true, true, true, true, true, false }, + { /* Uint8 N/A */ true, false, true, true, true, false }, + { /* Int8 N/A */ false, true, false, true, true, false }, + { /* Uint16 N/A */ false, false, true, false, true, false }, + { /* Int16 N/A */ false, false, false, true, true, false }, + { /* Int32 N/A */ false, false, false, false, true, false }, + { /* Int64 N/A */ false, false, false, false, false, true }, + }; + static_assert(arraysize(kIntegralTypes) == arraysize(kImplicitIntegralConversions), "size check"); + + for (size_t input_index = 0; input_index != kIntegralInputTypes.size(); ++input_index) { + DataType::Type input_type = kIntegralInputTypes[input_index]; + for (size_t result_index = 1u; result_index != kIntegralResultTypes.size(); ++result_index) { + DataType::Type result_type = kIntegralResultTypes[result_index]; + EXPECT_EQ(kImplicitIntegralConversions[input_index][result_index], + DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << " " << result_type; + } + } + for (DataType::Type input_type : kIntegralInputTypes) { + EXPECT_FALSE(DataType::IsTypeConversionImplicit(input_type, DataType::Type::kFloat32)); + EXPECT_FALSE(DataType::IsTypeConversionImplicit(input_type, DataType::Type::kFloat64)); + } + for (DataType::Type result_type : kIntegralResultTypes) { + EXPECT_FALSE(DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, result_type)); + EXPECT_FALSE(DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, result_type)); + } + EXPECT_TRUE( + DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, DataType::Type::kFloat32)); + EXPECT_FALSE( + DataType::IsTypeConversionImplicit(DataType::Type::kFloat32, DataType::Type::kFloat64)); + EXPECT_FALSE( + DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, DataType::Type::kFloat32)); + EXPECT_TRUE( + DataType::IsTypeConversionImplicit(DataType::Type::kFloat64, DataType::Type::kFloat64)); +} + +} // namespace art diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index c31c66a056..9fa0f72e80 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -18,13 +18,18 @@ #include "base/array_ref.h" #include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/stl_util.h" #include "ssa_phi_elimination.h" namespace art { static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { - ArenaVector<HBasicBlock*> worklist(graph->GetArena()->Adapter(kArenaAllocDCE)); + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(graph->GetArenaStack()); + + ScopedArenaVector<HBasicBlock*> worklist(allocator.Adapter(kArenaAllocDCE)); constexpr size_t kDefaultWorlistSize = 8; worklist.reserve(kDefaultWorlistSize); visited->SetBit(graph->GetEntryBlock()->GetBlockId()); @@ -118,7 +123,7 @@ static bool HasEquality(IfCondition condition) { } static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstruction* right) { - if (left == right && !Primitive::IsFloatingPointType(left->GetType())) { + if (left == right && !DataType::IsFloatingPointType(left->GetType())) { return condition->GetBlock()->GetGraph()->GetIntConstant( HasEquality(condition->GetCondition()) ? 1 : 0); } @@ -141,6 +146,141 @@ static HConstant* Evaluate(HCondition* condition, HInstruction* left, HInstructi } } +static bool RemoveNonNullControlDependences(HBasicBlock* block, HBasicBlock* throws) { + // Test for an if as last statement. + if (!block->EndsWithIf()) { + return false; + } + HIf* ifs = block->GetLastInstruction()->AsIf(); + // Find either: + // if obj == null + // throws + // else + // not_throws + // or: + // if obj != null + // not_throws + // else + // throws + HInstruction* cond = ifs->InputAt(0); + HBasicBlock* not_throws = nullptr; + if (throws == ifs->IfTrueSuccessor() && cond->IsEqual()) { + not_throws = ifs->IfFalseSuccessor(); + } else if (throws == ifs->IfFalseSuccessor() && cond->IsNotEqual()) { + not_throws = ifs->IfTrueSuccessor(); + } else { + return false; + } + DCHECK(cond->IsEqual() || cond->IsNotEqual()); + HInstruction* obj = cond->InputAt(1); + if (obj->IsNullConstant()) { + obj = cond->InputAt(0); + } else if (!cond->InputAt(0)->IsNullConstant()) { + return false; + } + // Scan all uses of obj and find null check under control dependence. + HBoundType* bound = nullptr; + const HUseList<HInstruction*>& uses = obj->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end;) { + HInstruction* user = it->GetUser(); + ++it; // increment before possibly replacing + if (user->IsNullCheck()) { + HBasicBlock* user_block = user->GetBlock(); + if (user_block != block && + user_block != throws && + block->Dominates(user_block)) { + if (bound == nullptr) { + ReferenceTypeInfo ti = obj->GetReferenceTypeInfo(); + bound = new (obj->GetBlock()->GetGraph()->GetAllocator()) HBoundType(obj); + bound->SetUpperBound(ti, /*can_be_null*/ false); + bound->SetReferenceTypeInfo(ti); + bound->SetCanBeNull(false); + not_throws->InsertInstructionBefore(bound, not_throws->GetFirstInstruction()); + } + user->ReplaceWith(bound); + user_block->RemoveInstruction(user); + } + } + } + return bound != nullptr; +} + +// Simplify the pattern: +// +// B1 +// / \ +// | foo() // always throws +// \ goto B2 +// \ / +// B2 +// +// Into: +// +// B1 +// / \ +// | foo() +// | goto Exit +// | | +// B2 Exit +// +// Rationale: +// Removal of the never taken edge to B2 may expose +// other optimization opportunities, such as code sinking. +bool HDeadCodeElimination::SimplifyAlwaysThrows() { + // Make sure exceptions go to exit. + if (graph_->HasTryCatch()) { + return false; + } + HBasicBlock* exit = graph_->GetExitBlock(); + if (exit == nullptr) { + return false; + } + + bool rerun_dominance_and_loop_analysis = false; + + // Order does not matter, just pick one. + for (HBasicBlock* block : graph_->GetReversePostOrder()) { + HInstruction* first = block->GetFirstInstruction(); + HInstruction* last = block->GetLastInstruction(); + // Ensure only one throwing instruction appears before goto. + if (first->AlwaysThrows() && + first->GetNext() == last && + last->IsGoto() && + block->GetPhis().IsEmpty() && + block->GetPredecessors().size() == 1u) { + DCHECK_EQ(block->GetSuccessors().size(), 1u); + HBasicBlock* pred = block->GetSinglePredecessor(); + HBasicBlock* succ = block->GetSingleSuccessor(); + // Ensure no computations are merged through throwing block. + // This does not prevent the optimization per se, but would + // require an elaborate clean up of the SSA graph. + if (succ != exit && + !block->Dominates(pred) && + pred->Dominates(succ) && + succ->GetPredecessors().size() > 1u && + succ->GetPhis().IsEmpty()) { + block->ReplaceSuccessor(succ, exit); + rerun_dominance_and_loop_analysis = true; + MaybeRecordStat(stats_, MethodCompilationStat::kSimplifyThrowingInvoke); + // Perform a quick follow up optimization on object != null control dependences + // that is much cheaper to perform now than in a later phase. + if (RemoveNonNullControlDependences(pred, block)) { + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedNullCheck); + } + } + } + } + + // We need to re-analyze the graph in order to run DCE afterwards. + if (rerun_dominance_and_loop_analysis) { + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + return true; + } + return false; +} + // Simplify the pattern: // // B1 B2 ... @@ -305,9 +445,12 @@ void HDeadCodeElimination::ConnectSuccessiveBlocks() { } bool HDeadCodeElimination::RemoveDeadBlocks() { + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + // Classify blocks as reachable/unreachable. - ArenaAllocator* allocator = graph_->GetArena(); - ArenaBitVector live_blocks(allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE); + ArenaBitVector live_blocks(&allocator, graph_->GetBlocks().size(), false, kArenaAllocDCE); + live_blocks.ClearAllBits(); MarkReachableBlocks(graph_, &live_blocks); bool removed_one_or_more_blocks = false; @@ -359,7 +502,7 @@ void HDeadCodeElimination::RemoveDeadInstructions() { DCHECK(!inst->IsControlFlow()); if (inst->IsDeadAndRemovable()) { block->RemoveInstruction(inst); - MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedDeadInstruction); } } } @@ -373,6 +516,7 @@ void HDeadCodeElimination::Run() { // Simplify graph to generate more dead block patterns. ConnectSuccessiveBlocks(); bool did_any_simplification = false; + did_any_simplification |= SimplifyAlwaysThrows(); did_any_simplification |= SimplifyIfs(); did_any_simplification |= RemoveDeadBlocks(); if (did_any_simplification) { diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h index 84fd890eee..92a7f562e1 100644 --- a/compiler/optimizing/dead_code_elimination.h +++ b/compiler/optimizing/dead_code_elimination.h @@ -40,6 +40,7 @@ class HDeadCodeElimination : public HOptimization { void MaybeRecordSimplifyIf(); bool RemoveDeadBlocks(); void RemoveDeadInstructions(); + bool SimplifyAlwaysThrows(); bool SimplifyIfs(); void ConnectSuccessiveBlocks(); diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc index fdd77e7261..adb6ce1187 100644 --- a/compiler/optimizing/dead_code_elimination_test.cc +++ b/compiler/optimizing/dead_code_elimination_test.cc @@ -14,9 +14,10 @@ * limitations under the License. */ +#include "dead_code_elimination.h" + #include "arch/x86/instruction_set_features_x86.h" #include "code_generator_x86.h" -#include "dead_code_elimination.h" #include "driver/compiler_options.h" #include "graph_checker.h" #include "optimizing_unit_test.h" @@ -26,14 +27,17 @@ namespace art { -class DeadCodeEliminationTest : public CommonCompilerTest {}; - -static void TestCode(const uint16_t* data, - const std::string& expected_before, - const std::string& expected_after) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +class DeadCodeEliminationTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, + const std::string& expected_before, + const std::string& expected_after); +}; + +void DeadCodeEliminationTest::TestCode(const std::vector<uint16_t>& data, + const std::string& expected_before, + const std::string& expected_after) { + HGraph* graph = CreateCFG(data); ASSERT_NE(graph, nullptr); StringPrettyPrinter printer_before(graph); @@ -69,7 +73,7 @@ static void TestCode(const uint16_t* data, * return-void 7. return */ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::IF_GEZ | 1 << 8, 3, @@ -131,7 +135,7 @@ TEST_F(DeadCodeEliminationTest, AdditionAndConditionalJump) { * return 13. return-void */ TEST_F(DeadCodeEliminationTest, AdditionsAndInconditionalJumps) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 8 | 0 << 12, Instruction::CONST_4 | 1 << 8 | 1 << 12, Instruction::ADD_INT | 2 << 8, 0 | 1 << 8, diff --git a/compiler/optimizing/dominator_test.cc b/compiler/optimizing/dominator_test.cc index 50c677adf5..1d72ba116e 100644 --- a/compiler/optimizing/dominator_test.cc +++ b/compiler/optimizing/dominator_test.cc @@ -16,7 +16,7 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "dex_instruction.h" +#include "dex/dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -24,12 +24,15 @@ namespace art { -class OptimizerTest : public CommonCompilerTest {}; +class OptimizerTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, const uint32_t* blocks, size_t blocks_length); +}; -static void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks_length) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +void OptimizerTest::TestCode(const std::vector<uint16_t>& data, + const uint32_t* blocks, + size_t blocks_length) { + HGraph* graph = CreateCFG(data); ASSERT_EQ(graph->GetBlocks().size(), blocks_length); for (size_t i = 0, e = blocks_length; i < e; ++i) { if (blocks[i] == kInvalidBlockId) { @@ -48,7 +51,7 @@ static void TestCode(const uint16_t* data, const uint32_t* blocks, size_t blocks } TEST_F(OptimizerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); // Block number 1 const uint32_t dominators[] = { @@ -61,7 +64,7 @@ TEST_F(OptimizerTest, ReturnVoid) { } TEST_F(OptimizerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::RETURN_VOID); // Block number 2 @@ -76,7 +79,7 @@ TEST_F(OptimizerTest, CFG1) { } TEST_F(OptimizerTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 Instruction::RETURN_VOID); // Block number 3 @@ -93,7 +96,7 @@ TEST_F(OptimizerTest, CFG2) { } TEST_F(OptimizerTest, CFG3) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, // Block number 1 Instruction::RETURN_VOID, // Block number 2 Instruction::GOTO | 0xFF00); // Block number 3 @@ -108,14 +111,14 @@ TEST_F(OptimizerTest, CFG3) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -124,7 +127,7 @@ TEST_F(OptimizerTest, CFG3) { } TEST_F(OptimizerTest, CFG4) { - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -137,14 +140,14 @@ TEST_F(OptimizerTest, CFG4) { TestCode(data1, dominators, sizeof(dominators) / sizeof(int)); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, dominators, sizeof(dominators) / sizeof(int)); } TEST_F(OptimizerTest, CFG5) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, // Block number 1 Instruction::GOTO | 0x100, // Dead block Instruction::GOTO | 0xFE00); // Block number 2 @@ -161,7 +164,7 @@ TEST_F(OptimizerTest, CFG5) { } TEST_F(OptimizerTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -180,7 +183,7 @@ TEST_F(OptimizerTest, CFG6) { } TEST_F(OptimizerTest, CFG7) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x100, // Block number 2 @@ -200,7 +203,7 @@ TEST_F(OptimizerTest, CFG7) { } TEST_F(OptimizerTest, CFG8) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -221,7 +224,7 @@ TEST_F(OptimizerTest, CFG8) { } TEST_F(OptimizerTest, CFG9) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, // Block number 1 Instruction::GOTO | 0x200, // Block number 2 @@ -242,7 +245,7 @@ TEST_F(OptimizerTest, CFG9) { } TEST_F(OptimizerTest, CFG10) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, // Block number 1 Instruction::IF_EQ, 3, // Block number 2 diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc index 0d4e1c5c97..b63914faf7 100644 --- a/compiler/optimizing/emit_swap_mips_test.cc +++ b/compiler/optimizing/emit_swap_mips_test.cc @@ -25,16 +25,15 @@ namespace art { -class EmitSwapMipsTest : public ::testing::Test { +class EmitSwapMipsTest : public OptimizingUnitTest { public: void SetUp() OVERRIDE { - allocator_.reset(new ArenaAllocator(&pool_)); - graph_ = CreateGraph(allocator_.get()); + graph_ = CreateGraph(); isa_features_ = MipsInstructionSetFeatures::FromCppDefines(); - codegen_ = new (graph_->GetArena()) mips::CodeGeneratorMIPS(graph_, - *isa_features_.get(), - CompilerOptions()); - moves_ = new (allocator_.get()) HParallelMove(allocator_.get()); + codegen_ = new (graph_->GetAllocator()) mips::CodeGeneratorMIPS(graph_, + *isa_features_.get(), + CompilerOptions()); + moves_ = new (GetAllocator()) HParallelMove(GetAllocator()); test_helper_.reset( new AssemblerTestInfrastructure(GetArchitectureString(), GetAssemblerCmdName(), @@ -47,8 +46,9 @@ class EmitSwapMipsTest : public ::testing::Test { } void TearDown() OVERRIDE { - allocator_.reset(); test_helper_.reset(); + isa_features_.reset(); + ResetPoolAndAllocator(); } // Get the typically used name for this architecture. @@ -91,7 +91,9 @@ class EmitSwapMipsTest : public ::testing::Test { return nullptr; } - void DriverWrapper(HParallelMove* move, std::string assembly_text, std::string test_name) { + void DriverWrapper(HParallelMove* move, + const std::string& assembly_text, + const std::string& test_name) { codegen_->GetMoveResolver()->EmitNativeCode(move); assembler_ = codegen_->GetAssembler(); assembler_->FinalizeCode(); @@ -102,12 +104,10 @@ class EmitSwapMipsTest : public ::testing::Test { } protected: - ArenaPool pool_; HGraph* graph_; HParallelMove* moves_; mips::CodeGeneratorMIPS* codegen_; mips::MipsAssembler* assembler_; - std::unique_ptr<ArenaAllocator> allocator_; std::unique_ptr<AssemblerTestInfrastructure> test_helper_; std::unique_ptr<const MipsInstructionSetFeatures> isa_features_; }; @@ -116,12 +116,12 @@ TEST_F(EmitSwapMipsTest, TwoRegisters) { moves_->AddMove( Location::RegisterLocation(4), Location::RegisterLocation(5), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves_->AddMove( Location::RegisterLocation(5), Location::RegisterLocation(4), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); const char* expected = "or $t8, $a1, $zero\n" @@ -134,12 +134,12 @@ TEST_F(EmitSwapMipsTest, TwoRegisterPairs) { moves_->AddMove( Location::RegisterPairLocation(4, 5), Location::RegisterPairLocation(6, 7), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves_->AddMove( Location::RegisterPairLocation(6, 7), Location::RegisterPairLocation(4, 5), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); const char* expected = "or $t8, $a2, $zero\n" @@ -155,12 +155,12 @@ TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) { moves_->AddMove( Location::FpuRegisterLocation(4), Location::FpuRegisterLocation(2), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); moves_->AddMove( Location::FpuRegisterLocation(2), Location::FpuRegisterLocation(4), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); const char* expected = "mov.s $f6, $f2\n" @@ -173,12 +173,12 @@ TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) { moves_->AddMove( Location::FpuRegisterLocation(4), Location::FpuRegisterLocation(2), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); moves_->AddMove( Location::FpuRegisterLocation(2), Location::FpuRegisterLocation(4), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); const char* expected = "mov.d $f6, $f2\n" @@ -191,12 +191,12 @@ TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) { moves_->AddMove( Location::RegisterLocation(4), Location::FpuRegisterLocation(2), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); moves_->AddMove( Location::FpuRegisterLocation(2), Location::RegisterLocation(4), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); const char* expected = "or $t8, $a0, $zero\n" @@ -209,12 +209,12 @@ TEST_F(EmitSwapMipsTest, RegisterPairAndFpuRegister) { moves_->AddMove( Location::RegisterPairLocation(4, 5), Location::FpuRegisterLocation(4), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); moves_->AddMove( Location::FpuRegisterLocation(4), Location::RegisterPairLocation(4, 5), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); const char* expected = "mfc1 $t8, $f4\n" @@ -230,22 +230,22 @@ TEST_F(EmitSwapMipsTest, TwoStackSlots) { moves_->AddMove( Location::StackSlot(52), Location::StackSlot(48), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves_->AddMove( Location::StackSlot(48), Location::StackSlot(52), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); const char* expected = - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $v0, 0($sp)\n" - "lw $v0, 56($sp)\n" - "lw $t8, 52($sp)\n" - "sw $v0, 52($sp)\n" - "sw $t8, 56($sp)\n" + "lw $v0, 68($sp)\n" + "lw $t8, 64($sp)\n" + "sw $v0, 64($sp)\n" + "sw $t8, 68($sp)\n" "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 4\n"; + "addiu $sp, $sp, 16\n"; DriverWrapper(moves_, expected, "TwoStackSlots"); } @@ -253,26 +253,26 @@ TEST_F(EmitSwapMipsTest, TwoDoubleStackSlots) { moves_->AddMove( Location::DoubleStackSlot(56), Location::DoubleStackSlot(48), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves_->AddMove( Location::DoubleStackSlot(48), Location::DoubleStackSlot(56), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); const char* expected = - "addiu $sp, $sp, -4\n" + "addiu $sp, $sp, -16\n" "sw $v0, 0($sp)\n" - "lw $v0, 60($sp)\n" - "lw $t8, 52($sp)\n" - "sw $v0, 52($sp)\n" - "sw $t8, 60($sp)\n" - "lw $v0, 64($sp)\n" - "lw $t8, 56($sp)\n" - "sw $v0, 56($sp)\n" - "sw $t8, 64($sp)\n" + "lw $v0, 72($sp)\n" + "lw $t8, 64($sp)\n" + "sw $v0, 64($sp)\n" + "sw $t8, 72($sp)\n" + "lw $v0, 76($sp)\n" + "lw $t8, 68($sp)\n" + "sw $v0, 68($sp)\n" + "sw $t8, 76($sp)\n" "lw $v0, 0($sp)\n" - "addiu $sp, $sp, 4\n"; + "addiu $sp, $sp, 16\n"; DriverWrapper(moves_, expected, "TwoDoubleStackSlots"); } @@ -280,12 +280,12 @@ TEST_F(EmitSwapMipsTest, RegisterAndStackSlot) { moves_->AddMove( Location::RegisterLocation(4), Location::StackSlot(48), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves_->AddMove( Location::StackSlot(48), Location::RegisterLocation(4), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); const char* expected = "or $t8, $a0, $zero\n" @@ -298,12 +298,12 @@ TEST_F(EmitSwapMipsTest, RegisterPairAndDoubleStackSlot) { moves_->AddMove( Location::RegisterPairLocation(4, 5), Location::DoubleStackSlot(32), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves_->AddMove( Location::DoubleStackSlot(32), Location::RegisterPairLocation(4, 5), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); const char* expected = "or $t8, $a0, $zero\n" @@ -319,12 +319,12 @@ TEST_F(EmitSwapMipsTest, FpuRegisterAndStackSlot) { moves_->AddMove( Location::FpuRegisterLocation(4), Location::StackSlot(48), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); moves_->AddMove( Location::StackSlot(48), Location::FpuRegisterLocation(4), - Primitive::kPrimFloat, + DataType::Type::kFloat32, nullptr); const char* expected = "mov.s $f6, $f4\n" @@ -337,12 +337,12 @@ TEST_F(EmitSwapMipsTest, FpuRegisterAndDoubleStackSlot) { moves_->AddMove( Location::FpuRegisterLocation(4), Location::DoubleStackSlot(48), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); moves_->AddMove( Location::DoubleStackSlot(48), Location::FpuRegisterLocation(4), - Primitive::kPrimDouble, + DataType::Type::kFloat64, nullptr); const char* expected = "mov.d $f6, $f4\n" diff --git a/compiler/optimizing/escape.cc b/compiler/optimizing/escape.cc index 9df5bf1017..2b578c1cc8 100644 --- a/compiler/optimizing/escape.cc +++ b/compiler/optimizing/escape.cc @@ -36,6 +36,12 @@ void CalculateEscape(HInstruction* reference, *is_singleton = true; *is_singleton_and_not_returned = true; *is_singleton_and_not_deopt_visible = true; + + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable reference is treated as being returned in the end. + *is_singleton_and_not_returned = false; + } + // Visit all uses to determine if this reference can escape into the heap, // a method call, an alias, etc. for (const HUseListNode<HInstruction*>& use : reference->GetUses()) { @@ -51,7 +57,9 @@ void CalculateEscape(HInstruction* reference, *is_singleton_and_not_returned = false; *is_singleton_and_not_deopt_visible = false; return; - } else if (user->IsPhi() || user->IsSelect() || user->IsInvoke() || + } else if (user->IsPhi() || + user->IsSelect() || + (user->IsInvoke() && user->GetSideEffects().DoesAnyWrite()) || (user->IsInstanceFieldSet() && (reference == user->InputAt(1))) || (user->IsUnresolvedInstanceFieldSet() && (reference == user->InputAt(1))) || (user->IsStaticFieldSet() && (reference == user->InputAt(1))) || diff --git a/compiler/optimizing/find_loops_test.cc b/compiler/optimizing/find_loops_test.cc index 04789d9a2d..75b8e9609e 100644 --- a/compiler/optimizing/find_loops_test.cc +++ b/compiler/optimizing/find_loops_test.cc @@ -16,64 +16,58 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "ssa_liveness_analysis.h" #include "pretty_printer.h" +#include "ssa_liveness_analysis.h" #include "gtest/gtest.h" namespace art { -class FindLoopsTest : public CommonCompilerTest {}; +class FindLoopsTest : public OptimizingUnitTest {}; TEST_F(FindLoopsTest, CFG1) { // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); for (HBasicBlock* block : graph->GetBlocks()) { ASSERT_EQ(block->GetLoopInformation(), nullptr); } } TEST_F(FindLoopsTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); for (HBasicBlock* block : graph->GetBlocks()) { ASSERT_EQ(block->GetLoopInformation(), nullptr); } } TEST_F(FindLoopsTest, CFG3) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, Instruction::GOTO | 0x100, Instruction::RETURN); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); for (HBasicBlock* block : graph->GetBlocks()) { ASSERT_EQ(block->GetLoopInformation(), nullptr); } } TEST_F(FindLoopsTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -81,24 +75,20 @@ TEST_F(FindLoopsTest, CFG4) { Instruction::CONST_4 | 5 << 12 | 0, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); for (HBasicBlock* block : graph->GetBlocks()) { ASSERT_EQ(block->GetLoopInformation(), nullptr); } } TEST_F(FindLoopsTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); for (HBasicBlock* block : graph->GetBlocks()) { ASSERT_EQ(block->GetLoopInformation(), nullptr); } @@ -136,15 +126,13 @@ TEST_F(FindLoopsTest, Loop1) { // while (a == a) { // } // return; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, Instruction::RETURN_VOID); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header @@ -162,7 +150,7 @@ TEST_F(FindLoopsTest, Loop2) { // while (a == a) { // } // return a; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x400, Instruction::IF_EQ, 4, @@ -170,9 +158,7 @@ TEST_F(FindLoopsTest, Loop2) { Instruction::GOTO | 0xFD00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // goto block @@ -187,7 +173,7 @@ TEST_F(FindLoopsTest, Loop2) { TEST_F(FindLoopsTest, Loop3) { // Make sure we create a preheader of a loop when a header originally has two // incoming blocks and one back edge. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -195,9 +181,7 @@ TEST_F(FindLoopsTest, Loop3) { Instruction::GOTO | 0xFE00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // goto block @@ -213,7 +197,7 @@ TEST_F(FindLoopsTest, Loop3) { TEST_F(FindLoopsTest, Loop4) { // Test loop with originally two back edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -221,9 +205,7 @@ TEST_F(FindLoopsTest, Loop4) { Instruction::GOTO | 0xFB00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header @@ -239,7 +221,7 @@ TEST_F(FindLoopsTest, Loop4) { TEST_F(FindLoopsTest, Loop5) { // Test loop with two exit edges. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -247,9 +229,7 @@ TEST_F(FindLoopsTest, Loop5) { Instruction::GOTO | 0xFB00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header @@ -264,7 +244,7 @@ TEST_F(FindLoopsTest, Loop5) { } TEST_F(FindLoopsTest, InnerLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::IF_EQ, 3, @@ -272,9 +252,7 @@ TEST_F(FindLoopsTest, InnerLoop) { Instruction::GOTO | 0xFB00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header of outer loop @@ -295,7 +273,7 @@ TEST_F(FindLoopsTest, InnerLoop) { } TEST_F(FindLoopsTest, TwoLoops) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0xFE00, // first loop @@ -303,9 +281,7 @@ TEST_F(FindLoopsTest, TwoLoops) { Instruction::GOTO | 0xFE00, // second loop Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header of first loop @@ -325,7 +301,7 @@ TEST_F(FindLoopsTest, TwoLoops) { } TEST_F(FindLoopsTest, NonNaturalLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x0100, @@ -333,9 +309,7 @@ TEST_F(FindLoopsTest, NonNaturalLoop) { Instruction::GOTO | 0xFD00, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); ASSERT_TRUE(graph->GetBlocks()[3]->IsLoopHeader()); HLoopInformation* info = graph->GetBlocks()[3]->GetLoopInformation(); ASSERT_EQ(1u, info->NumberOfBackEdges()); @@ -343,15 +317,13 @@ TEST_F(FindLoopsTest, NonNaturalLoop) { } TEST_F(FindLoopsTest, DoWhileLoop) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0xFFFF, Instruction::RETURN | 0 << 8); - ArenaPool arena; - ArenaAllocator allocator(&arena); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); TestBlock(graph, 0, false, kInvalidBlockId); // entry block TestBlock(graph, 1, false, kInvalidBlockId); // pre header of first loop diff --git a/compiler/optimizing/graph_checker.cc b/compiler/optimizing/graph_checker.cc index aea901dec7..c88baa8610 100644 --- a/compiler/optimizing/graph_checker.cc +++ b/compiler/optimizing/graph_checker.cc @@ -17,20 +17,29 @@ #include "graph_checker.h" #include <algorithm> -#include <string> #include <sstream> +#include <string> #include "android-base/stringprintf.h" -#include "base/arena_containers.h" #include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" namespace art { using android::base::StringPrintf; static bool IsAllowedToJumpToExitBlock(HInstruction* instruction) { - return instruction->IsThrow() || instruction->IsReturn() || instruction->IsReturnVoid(); + // Anything that returns is allowed to jump into the exit block. + if (instruction->IsReturn() || instruction->IsReturnVoid()) { + return true; + } + // Anything that always throws is allowed to jump into the exit block. + if (instruction->IsGoto() && instruction->GetPrevious() != nullptr) { + instruction = instruction->GetPrevious(); + } + return instruction->AlwaysThrows(); } static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { @@ -47,10 +56,13 @@ static bool IsExitTryBoundaryIntoExitBlock(HBasicBlock* block) { void GraphChecker::VisitBasicBlock(HBasicBlock* block) { current_block_ = block; + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); + // Check consistency with respect to predecessors of `block`. // Note: Counting duplicates with a sorted vector uses up to 6x less memory // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ArenaVector<HBasicBlock*>& sorted_predecessors = blocks_storage_; + ScopedArenaVector<HBasicBlock*> sorted_predecessors(allocator.Adapter(kArenaAllocGraphChecker)); sorted_predecessors.assign(block->GetPredecessors().begin(), block->GetPredecessors().end()); std::sort(sorted_predecessors.begin(), sorted_predecessors.end()); for (auto it = sorted_predecessors.begin(), end = sorted_predecessors.end(); it != end; ) { @@ -73,7 +85,7 @@ void GraphChecker::VisitBasicBlock(HBasicBlock* block) { // Check consistency with respect to successors of `block`. // Note: Counting duplicates with a sorted vector uses up to 6x less memory // than ArenaSafeMap<HBasicBlock*, size_t> and also allows storage reuse. - ArenaVector<HBasicBlock*>& sorted_successors = blocks_storage_; + ScopedArenaVector<HBasicBlock*> sorted_successors(allocator.Adapter(kArenaAllocGraphChecker)); sorted_successors.assign(block->GetSuccessors().begin(), block->GetSuccessors().end()); std::sort(sorted_successors.begin(), sorted_successors.end()); for (auto it = sorted_successors.begin(), end = sorted_successors.end(); it != end; ) { @@ -456,7 +468,7 @@ void GraphChecker::VisitInstruction(HInstruction* instruction) { } // Ensure that reference type instructions have reference type info. - if (instruction->GetType() == Primitive::kPrimNot) { + if (instruction->GetType() == DataType::Type::kReference) { if (!instruction->GetReferenceTypeInfo().IsValid()) { AddError(StringPrintf("Reference type instruction %s:%d does not have " "valid reference type information.", @@ -674,7 +686,7 @@ void GraphChecker::HandleLoop(HBasicBlock* loop_header) { static bool IsSameSizeConstant(const HInstruction* insn1, const HInstruction* insn2) { return insn1->IsConstant() && insn2->IsConstant() - && Primitive::Is64BitType(insn1->GetType()) == Primitive::Is64BitType(insn2->GetType()); + && DataType::Is64BitType(insn1->GetType()) == DataType::Is64BitType(insn2->GetType()); } static bool IsConstantEquivalent(const HInstruction* insn1, @@ -721,20 +733,20 @@ void GraphChecker::VisitPhi(HPhi* phi) { // Ensure that the inputs have the same primitive kind as the phi. for (size_t i = 0; i < input_records.size(); ++i) { HInstruction* input = input_records[i].GetInstruction(); - if (Primitive::PrimitiveKind(input->GetType()) != Primitive::PrimitiveKind(phi->GetType())) { + if (DataType::Kind(input->GetType()) != DataType::Kind(phi->GetType())) { AddError(StringPrintf( "Input %d at index %zu of phi %d from block %d does not have the " "same kind as the phi: %s versus %s", input->GetId(), i, phi->GetId(), phi->GetBlock()->GetBlockId(), - Primitive::PrettyDescriptor(input->GetType()), - Primitive::PrettyDescriptor(phi->GetType()))); + DataType::PrettyDescriptor(input->GetType()), + DataType::PrettyDescriptor(phi->GetType()))); } } if (phi->GetType() != HPhi::ToPhiType(phi->GetType())) { AddError(StringPrintf("Phi %d in block %d does not have an expected phi type: %s", phi->GetId(), phi->GetBlock()->GetBlockId(), - Primitive::PrettyDescriptor(phi->GetType()))); + DataType::PrettyDescriptor(phi->GetType()))); } if (phi->IsCatchPhi()) { @@ -820,7 +832,7 @@ void GraphChecker::VisitPhi(HPhi* phi) { phi->GetId(), phi->GetRegNumber(), type_str.str().c_str())); - } else if (phi->GetType() == Primitive::kPrimNot) { + } else if (phi->GetType() == DataType::Type::kReference) { std::stringstream type_str; type_str << other_phi->GetType(); AddError(StringPrintf( @@ -829,10 +841,14 @@ void GraphChecker::VisitPhi(HPhi* phi) { phi->GetRegNumber(), type_str.str().c_str())); } else { + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator(GetGraph()->GetArenaStack()); // If we get here, make sure we allocate all the necessary storage at once // because the BitVector reallocation strategy has very bad worst-case behavior. - ArenaBitVector& visited = visited_storage_; - visited.SetBit(GetGraph()->GetCurrentInstructionId()); + ArenaBitVector visited(&allocator, + GetGraph()->GetCurrentInstructionId(), + /* expandable */ false, + kArenaAllocGraphChecker); visited.ClearAllBits(); if (!IsConstantEquivalent(phi, other_phi, &visited)) { AddError(StringPrintf("Two phis (%d and %d) found for VReg %d but they " @@ -859,7 +875,7 @@ void GraphChecker::HandleBooleanInput(HInstruction* instruction, size_t input_in static_cast<int>(input_index), value)); } - } else if (Primitive::PrimitiveKind(input->GetType()) != Primitive::kPrimInt) { + } else if (DataType::Kind(input->GetType()) != DataType::Type::kInt32) { // TODO: We need a data-flow analysis to determine if an input like Phi, // Select or a binary operation is actually Boolean. Allow for now. AddError(StringPrintf( @@ -867,7 +883,7 @@ void GraphChecker::HandleBooleanInput(HInstruction* instruction, size_t input_in instruction->DebugName(), instruction->GetId(), static_cast<int>(input_index), - Primitive::PrettyDescriptor(input->GetType()))); + DataType::PrettyDescriptor(input->GetType()))); } } @@ -904,27 +920,27 @@ void GraphChecker::VisitBooleanNot(HBooleanNot* instruction) { void GraphChecker::VisitCondition(HCondition* op) { VisitInstruction(op); - if (op->GetType() != Primitive::kPrimBoolean) { + if (op->GetType() != DataType::Type::kBool) { AddError(StringPrintf( "Condition %s %d has a non-Boolean result type: %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(op->GetType()))); + DataType::PrettyDescriptor(op->GetType()))); } HInstruction* lhs = op->InputAt(0); HInstruction* rhs = op->InputAt(1); - if (Primitive::PrimitiveKind(lhs->GetType()) != Primitive::PrimitiveKind(rhs->GetType())) { + if (DataType::Kind(lhs->GetType()) != DataType::Kind(rhs->GetType())) { AddError(StringPrintf( "Condition %s %d has inputs of different kinds: %s, and %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(lhs->GetType()), - Primitive::PrettyDescriptor(rhs->GetType()))); + DataType::PrettyDescriptor(lhs->GetType()), + DataType::PrettyDescriptor(rhs->GetType()))); } if (!op->IsEqual() && !op->IsNotEqual()) { - if ((lhs->GetType() == Primitive::kPrimNot)) { + if ((lhs->GetType() == DataType::Type::kReference)) { AddError(StringPrintf( "Condition %s %d uses an object as left-hand side input.", op->DebugName(), op->GetId())); - } else if (rhs->GetType() == Primitive::kPrimNot) { + } else if (rhs->GetType() == DataType::Type::kReference) { AddError(StringPrintf( "Condition %s %d uses an object as right-hand side input.", op->DebugName(), op->GetId())); @@ -934,72 +950,72 @@ void GraphChecker::VisitCondition(HCondition* op) { void GraphChecker::VisitNeg(HNeg* instruction) { VisitInstruction(instruction); - Primitive::Type input_type = instruction->InputAt(0)->GetType(); - Primitive::Type result_type = instruction->GetType(); - if (result_type != Primitive::PrimitiveKind(input_type)) { + DataType::Type input_type = instruction->InputAt(0)->GetType(); + DataType::Type result_type = instruction->GetType(); + if (result_type != DataType::Kind(input_type)) { AddError(StringPrintf("Binary operation %s %d has a result type different " "from its input kind: %s vs %s.", instruction->DebugName(), instruction->GetId(), - Primitive::PrettyDescriptor(result_type), - Primitive::PrettyDescriptor(input_type))); + DataType::PrettyDescriptor(result_type), + DataType::PrettyDescriptor(input_type))); } } void GraphChecker::VisitBinaryOperation(HBinaryOperation* op) { VisitInstruction(op); - Primitive::Type lhs_type = op->InputAt(0)->GetType(); - Primitive::Type rhs_type = op->InputAt(1)->GetType(); - Primitive::Type result_type = op->GetType(); + DataType::Type lhs_type = op->InputAt(0)->GetType(); + DataType::Type rhs_type = op->InputAt(1)->GetType(); + DataType::Type result_type = op->GetType(); // Type consistency between inputs. if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) { - if (Primitive::PrimitiveKind(rhs_type) != Primitive::kPrimInt) { + if (DataType::Kind(rhs_type) != DataType::Type::kInt32) { AddError(StringPrintf("Shift/rotate operation %s %d has a non-int kind second input: " "%s of type %s.", op->DebugName(), op->GetId(), op->InputAt(1)->DebugName(), - Primitive::PrettyDescriptor(rhs_type))); + DataType::PrettyDescriptor(rhs_type))); } } else { - if (Primitive::PrimitiveKind(lhs_type) != Primitive::PrimitiveKind(rhs_type)) { + if (DataType::Kind(lhs_type) != DataType::Kind(rhs_type)) { AddError(StringPrintf("Binary operation %s %d has inputs of different kinds: %s, and %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(lhs_type), - Primitive::PrettyDescriptor(rhs_type))); + DataType::PrettyDescriptor(lhs_type), + DataType::PrettyDescriptor(rhs_type))); } } // Type consistency between result and input(s). if (op->IsCompare()) { - if (result_type != Primitive::kPrimInt) { + if (result_type != DataType::Type::kInt32) { AddError(StringPrintf("Compare operation %d has a non-int result type: %s.", op->GetId(), - Primitive::PrettyDescriptor(result_type))); + DataType::PrettyDescriptor(result_type))); } } else if (op->IsUShr() || op->IsShr() || op->IsShl() || op->IsRor()) { // Only check the first input (value), as the second one (distance) // must invariably be of kind `int`. - if (result_type != Primitive::PrimitiveKind(lhs_type)) { + if (result_type != DataType::Kind(lhs_type)) { AddError(StringPrintf("Shift/rotate operation %s %d has a result type different " "from its left-hand side (value) input kind: %s vs %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(result_type), - Primitive::PrettyDescriptor(lhs_type))); + DataType::PrettyDescriptor(result_type), + DataType::PrettyDescriptor(lhs_type))); } } else { - if (Primitive::PrimitiveKind(result_type) != Primitive::PrimitiveKind(lhs_type)) { + if (DataType::Kind(result_type) != DataType::Kind(lhs_type)) { AddError(StringPrintf("Binary operation %s %d has a result kind different " "from its left-hand side input kind: %s vs %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(result_type), - Primitive::PrettyDescriptor(lhs_type))); + DataType::PrettyDescriptor(result_type), + DataType::PrettyDescriptor(lhs_type))); } - if (Primitive::PrimitiveKind(result_type) != Primitive::PrimitiveKind(rhs_type)) { + if (DataType::Kind(result_type) != DataType::Kind(rhs_type)) { AddError(StringPrintf("Binary operation %s %d has a result kind different " "from its right-hand side input kind: %s vs %s.", op->DebugName(), op->GetId(), - Primitive::PrettyDescriptor(result_type), - Primitive::PrettyDescriptor(rhs_type))); + DataType::PrettyDescriptor(result_type), + DataType::PrettyDescriptor(rhs_type))); } } } @@ -1028,16 +1044,16 @@ void GraphChecker::VisitBoundType(HBoundType* instruction) { void GraphChecker::VisitTypeConversion(HTypeConversion* instruction) { VisitInstruction(instruction); - Primitive::Type result_type = instruction->GetResultType(); - Primitive::Type input_type = instruction->GetInputType(); + DataType::Type result_type = instruction->GetResultType(); + DataType::Type input_type = instruction->GetInputType(); // Invariant: We should never generate a conversion to a Boolean value. - if (result_type == Primitive::kPrimBoolean) { + if (result_type == DataType::Type::kBool) { AddError(StringPrintf( "%s %d converts to a %s (from a %s).", instruction->DebugName(), instruction->GetId(), - Primitive::PrettyDescriptor(result_type), - Primitive::PrettyDescriptor(input_type))); + DataType::PrettyDescriptor(result_type), + DataType::PrettyDescriptor(input_type))); } } diff --git a/compiler/optimizing/graph_checker.h b/compiler/optimizing/graph_checker.h index 3060c80073..0f0b49d240 100644 --- a/compiler/optimizing/graph_checker.h +++ b/compiler/optimizing/graph_checker.h @@ -17,10 +17,13 @@ #ifndef ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_ #define ART_COMPILER_OPTIMIZING_GRAPH_CHECKER_H_ -#include "nodes.h" - #include <ostream> +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" +#include "nodes.h" + namespace art { // A control-flow graph visitor performing various checks. @@ -28,14 +31,12 @@ class GraphChecker : public HGraphDelegateVisitor { public: explicit GraphChecker(HGraph* graph, const char* dump_prefix = "art::GraphChecker: ") : HGraphDelegateVisitor(graph), - errors_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)), + errors_(graph->GetAllocator()->Adapter(kArenaAllocGraphChecker)), dump_prefix_(dump_prefix), - seen_ids_(graph->GetArena(), - graph->GetCurrentInstructionId(), - false, - kArenaAllocGraphChecker), - blocks_storage_(graph->GetArena()->Adapter(kArenaAllocGraphChecker)), - visited_storage_(graph->GetArena(), 0u, true, kArenaAllocGraphChecker) {} + allocator_(graph->GetArenaStack()), + seen_ids_(&allocator_, graph->GetCurrentInstructionId(), false, kArenaAllocGraphChecker) { + seen_ids_.ClearAllBits(); + } // Check the whole graph (in reverse post-order). void Run() { @@ -104,12 +105,9 @@ class GraphChecker : public HGraphDelegateVisitor { private: // String displayed before dumped errors. const char* const dump_prefix_; + ScopedArenaAllocator allocator_; ArenaBitVector seen_ids_; - // To reduce the total arena memory allocation, we reuse the same storage. - ArenaVector<HBasicBlock*> blocks_storage_; - ArenaBitVector visited_storage_; - DISALLOW_COPY_AND_ASSIGN(GraphChecker); }; diff --git a/compiler/optimizing/graph_checker_test.cc b/compiler/optimizing/graph_checker_test.cc index 2b8231942b..08bfa5d80f 100644 --- a/compiler/optimizing/graph_checker_test.cc +++ b/compiler/optimizing/graph_checker_test.cc @@ -19,6 +19,12 @@ namespace art { +class GraphCheckerTest : public OptimizingUnitTest { + protected: + HGraph* CreateSimpleCFG(); + void TestCode(const std::vector<uint16_t>& data); +}; + /** * Create a simple control-flow graph composed of two blocks: * @@ -27,14 +33,14 @@ namespace art { * BasicBlock 1, pred: 0 * 1: Exit */ -HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { - HGraph* graph = CreateGraph(allocator); - HBasicBlock* entry_block = new (allocator) HBasicBlock(graph); - entry_block->AddInstruction(new (allocator) HReturnVoid()); +HGraph* GraphCheckerTest::CreateSimpleCFG() { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = new (GetAllocator()) HBasicBlock(graph); + entry_block->AddInstruction(new (GetAllocator()) HReturnVoid()); graph->AddBlock(entry_block); graph->SetEntryBlock(entry_block); - HBasicBlock* exit_block = new (allocator) HBasicBlock(graph); - exit_block->AddInstruction(new (allocator) HExit()); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); + exit_block->AddInstruction(new (GetAllocator()) HExit()); graph->AddBlock(exit_block); graph->SetExitBlock(exit_block); entry_block->AddSuccessor(exit_block); @@ -42,10 +48,8 @@ HGraph* CreateSimpleCFG(ArenaAllocator* allocator) { return graph; } -static void TestCode(const uint16_t* data) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +void GraphCheckerTest::TestCode(const std::vector<uint16_t>& data) { + HGraph* graph = CreateCFG(data); ASSERT_NE(graph, nullptr); GraphChecker graph_checker(graph); @@ -53,17 +57,15 @@ static void TestCode(const uint16_t* data) { ASSERT_TRUE(graph_checker.IsValid()); } -class GraphCheckerTest : public CommonCompilerTest {}; - TEST_F(GraphCheckerTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); TestCode(data); } TEST_F(GraphCheckerTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -71,7 +73,7 @@ TEST_F(GraphCheckerTest, CFG1) { } TEST_F(GraphCheckerTest, CFG2) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -81,7 +83,7 @@ TEST_F(GraphCheckerTest, CFG2) { } TEST_F(GraphCheckerTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -93,10 +95,7 @@ TEST_F(GraphCheckerTest, CFG3) { // Test case with an invalid graph containing inconsistent // predecessor/successor arcs in CFG. TEST_F(GraphCheckerTest, InconsistentPredecessorsAndSuccessors) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateSimpleCFG(&allocator); + HGraph* graph = CreateSimpleCFG(); GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); @@ -111,10 +110,7 @@ TEST_F(GraphCheckerTest, InconsistentPredecessorsAndSuccessors) { // Test case with an invalid graph containing a non-branch last // instruction in a block. TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateSimpleCFG(&allocator); + HGraph* graph = CreateSimpleCFG(); GraphChecker graph_checker(graph); graph_checker.Run(); ASSERT_TRUE(graph_checker.IsValid()); @@ -132,7 +128,7 @@ TEST_F(GraphCheckerTest, BlockEndingWithNonBranchInstruction) { TEST_F(GraphCheckerTest, SSAPhi) { // This code creates one Phi function during the conversion to SSA form. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, diff --git a/compiler/optimizing/graph_test.cc b/compiler/optimizing/graph_test.cc index 28ee3a5e8b..29af808731 100644 --- a/compiler/optimizing/graph_test.cc +++ b/compiler/optimizing/graph_test.cc @@ -24,43 +24,52 @@ namespace art { -static HBasicBlock* createIfBlock(HGraph* graph, ArenaAllocator* allocator) { - HBasicBlock* if_block = new (allocator) HBasicBlock(graph); +class GraphTest : public OptimizingUnitTest { + protected: + HBasicBlock* CreateIfBlock(HGraph* graph); + HBasicBlock* CreateGotoBlock(HGraph* graph); + HBasicBlock* CreateEntryBlock(HGraph* graph); + HBasicBlock* CreateReturnBlock(HGraph* graph); + HBasicBlock* CreateExitBlock(HGraph* graph); +}; + +HBasicBlock* GraphTest::CreateIfBlock(HGraph* graph) { + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(if_block); HInstruction* instr = graph->GetIntConstant(4); - HInstruction* equal = new (allocator) HEqual(instr, instr); + HInstruction* equal = new (GetAllocator()) HEqual(instr, instr); if_block->AddInstruction(equal); - instr = new (allocator) HIf(equal); + instr = new (GetAllocator()) HIf(equal); if_block->AddInstruction(instr); return if_block; } -static HBasicBlock* createGotoBlock(HGraph* graph, ArenaAllocator* allocator) { - HBasicBlock* block = new (allocator) HBasicBlock(graph); +HBasicBlock* GraphTest::CreateGotoBlock(HGraph* graph) { + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); - HInstruction* got = new (allocator) HGoto(); + HInstruction* got = new (GetAllocator()) HGoto(); block->AddInstruction(got); return block; } -static HBasicBlock* createEntryBlock(HGraph* graph, ArenaAllocator* allocator) { - HBasicBlock* block = createGotoBlock(graph, allocator); +HBasicBlock* GraphTest::CreateEntryBlock(HGraph* graph) { + HBasicBlock* block = CreateGotoBlock(graph); graph->SetEntryBlock(block); return block; } -static HBasicBlock* createReturnBlock(HGraph* graph, ArenaAllocator* allocator) { - HBasicBlock* block = new (allocator) HBasicBlock(graph); +HBasicBlock* GraphTest::CreateReturnBlock(HGraph* graph) { + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); - HInstruction* return_instr = new (allocator) HReturnVoid(); + HInstruction* return_instr = new (GetAllocator()) HReturnVoid(); block->AddInstruction(return_instr); return block; } -static HBasicBlock* createExitBlock(HGraph* graph, ArenaAllocator* allocator) { - HBasicBlock* block = new (allocator) HBasicBlock(graph); +HBasicBlock* GraphTest::CreateExitBlock(HGraph* graph) { + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); - HInstruction* exit_instr = new (allocator) HExit(); + HInstruction* exit_instr = new (GetAllocator()) HExit(); block->AddInstruction(exit_instr); return block; } @@ -68,16 +77,13 @@ static HBasicBlock* createExitBlock(HGraph* graph, ArenaAllocator* allocator) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the false block to be the return block. -TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* if_true = createGotoBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); - HBasicBlock* exit_block = createExitBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorSimpleJoinBlock1) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* if_true = CreateGotoBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); + HBasicBlock* exit_block = CreateExitBlock(graph); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(if_true); @@ -103,16 +109,13 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock1) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the true block to be the return block. -TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* if_false = createGotoBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); - HBasicBlock* exit_block = createExitBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorSimpleJoinBlock2) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* if_false = CreateGotoBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); + HBasicBlock* exit_block = CreateExitBlock(graph); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(return_block); @@ -138,15 +141,12 @@ TEST(GraphTest, IfSuccessorSimpleJoinBlock2) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the true block to be the loop header. -TEST(GraphTest, IfSuccessorMultipleBackEdges1) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); - HBasicBlock* exit_block = createExitBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorMultipleBackEdges1) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); + HBasicBlock* exit_block = CreateExitBlock(graph); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(if_block); @@ -173,15 +173,12 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges1) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the false block to be the loop header. -TEST(GraphTest, IfSuccessorMultipleBackEdges2) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); - HBasicBlock* exit_block = createExitBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorMultipleBackEdges2) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); + HBasicBlock* exit_block = CreateExitBlock(graph); entry_block->AddSuccessor(if_block); if_block->AddSuccessor(return_block); @@ -208,16 +205,13 @@ TEST(GraphTest, IfSuccessorMultipleBackEdges2) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the true block to be a loop header with multiple pre headers. -TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* first_if_block = createIfBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* loop_block = createGotoBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorMultiplePreHeaders1) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* first_if_block = CreateIfBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* loop_block = CreateGotoBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); entry_block->AddSuccessor(first_if_block); first_if_block->AddSuccessor(if_block); @@ -247,16 +241,13 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders1) { // Test that the successors of an if block stay consistent after a SimplifyCFG. // This test sets the false block to be a loop header with multiple pre headers. -TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry_block = createEntryBlock(graph, &allocator); - HBasicBlock* first_if_block = createIfBlock(graph, &allocator); - HBasicBlock* if_block = createIfBlock(graph, &allocator); - HBasicBlock* loop_block = createGotoBlock(graph, &allocator); - HBasicBlock* return_block = createReturnBlock(graph, &allocator); +TEST_F(GraphTest, IfSuccessorMultiplePreHeaders2) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry_block = CreateEntryBlock(graph); + HBasicBlock* first_if_block = CreateIfBlock(graph); + HBasicBlock* if_block = CreateIfBlock(graph); + HBasicBlock* loop_block = CreateGotoBlock(graph); + HBasicBlock* return_block = CreateReturnBlock(graph); entry_block->AddSuccessor(first_if_block); first_if_block->AddSuccessor(if_block); @@ -283,17 +274,14 @@ TEST(GraphTest, IfSuccessorMultiplePreHeaders2) { loop_block->GetLoopInformation()->GetPreHeader()); } -TEST(GraphTest, InsertInstructionBefore) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* block = createGotoBlock(graph, &allocator); +TEST_F(GraphTest, InsertInstructionBefore) { + HGraph* graph = CreateGraph(); + HBasicBlock* block = CreateGotoBlock(graph); HInstruction* got = block->GetLastInstruction(); ASSERT_TRUE(got->IsControlFlow()); // Test at the beginning of the block. - HInstruction* first_instruction = new (&allocator) HIntConstant(4); + HInstruction* first_instruction = new (GetAllocator()) HIntConstant(4); block->InsertInstructionBefore(first_instruction, got); ASSERT_NE(first_instruction->GetId(), -1); @@ -306,7 +294,7 @@ TEST(GraphTest, InsertInstructionBefore) { ASSERT_EQ(got->GetPrevious(), first_instruction); // Test in the middle of the block. - HInstruction* second_instruction = new (&allocator) HIntConstant(4); + HInstruction* second_instruction = new (GetAllocator()) HIntConstant(4); block->InsertInstructionBefore(second_instruction, got); ASSERT_NE(second_instruction->GetId(), -1); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index a20ec3c0db..5ff31cead5 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -21,10 +21,13 @@ #include <cctype> #include <sstream> +#include "art_method.h" #include "bounds_check_elimination.h" #include "builder.h" #include "code_generator.h" +#include "data_type-inl.h" #include "dead_code_elimination.h" +#include "dex/descriptors_names.h" #include "disassembler.h" #include "inliner.h" #include "licm.h" @@ -32,6 +35,7 @@ #include "optimization.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" +#include "scoped_thread_state_change-inl.h" #include "ssa_liveness_analysis.h" #include "utils/assembler.h" #include "utils/intrusive_forward_list.h" @@ -150,7 +154,7 @@ class HGraphVisualizerDisassembler { } const uint8_t* base = disassembler_->GetDisassemblerOptions()->base_address_; - if (instruction_set_ == kThumb2) { + if (instruction_set_ == InstructionSet::kThumb2) { // ARM and Thumb-2 use the same disassembler. The bottom bit of the // address is used to distinguish between the two. base += 1; @@ -243,25 +247,6 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { } } - char GetTypeId(Primitive::Type type) { - // Note that Primitive::Descriptor would not work for us - // because it does not handle reference types (that is kPrimNot). - switch (type) { - case Primitive::kPrimBoolean: return 'z'; - case Primitive::kPrimByte: return 'b'; - case Primitive::kPrimChar: return 'c'; - case Primitive::kPrimShort: return 's'; - case Primitive::kPrimInt: return 'i'; - case Primitive::kPrimLong: return 'j'; - case Primitive::kPrimFloat: return 'f'; - case Primitive::kPrimDouble: return 'd'; - case Primitive::kPrimNot: return 'l'; - case Primitive::kPrimVoid: return 'v'; - } - LOG(FATAL) << "Unreachable"; - return 'v'; - } - void PrintPredecessors(HBasicBlock* block) { AddIndent(); output_ << "predecessors"; @@ -461,6 +446,9 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { ? GetGraph()->GetDexFile().PrettyMethod(invoke->GetDexMethodIndex(), kWithSignature) : method->PrettyMethod(kWithSignature); StartAttributeStream("method_name") << method_name; + StartAttributeStream("always_throws") << std::boolalpha + << invoke->AlwaysThrows() + << std::noboolalpha; } void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { @@ -501,6 +489,20 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("field_type") << iset->GetFieldType(); } + void VisitStaticFieldGet(HStaticFieldGet* sget) OVERRIDE { + StartAttributeStream("field_name") << + sget->GetFieldInfo().GetDexFile().PrettyField(sget->GetFieldInfo().GetFieldIndex(), + /* with type */ false); + StartAttributeStream("field_type") << sget->GetFieldType(); + } + + void VisitStaticFieldSet(HStaticFieldSet* sset) OVERRIDE { + StartAttributeStream("field_name") << + sset->GetFieldInfo().GetDexFile().PrettyField(sset->GetFieldInfo().GetFieldIndex(), + /* with type */ false); + StartAttributeStream("field_type") << sset->GetFieldType(); + } + void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* field_access) OVERRIDE { StartAttributeStream("field_type") << field_access->GetFieldType(); } @@ -525,20 +527,21 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { StartAttributeStream("kind") << deoptimize->GetKind(); } - void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { - StartAttributeStream("unsigned") << std::boolalpha << hadd->IsUnsigned() << std::noboolalpha; - StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; + void VisitVecOperation(HVecOperation* vec_operation) OVERRIDE { + StartAttributeStream("packed_type") << vec_operation->GetPackedType(); } - void VisitVecMin(HVecMin* min) OVERRIDE { - StartAttributeStream("unsigned") << std::boolalpha << min->IsUnsigned() << std::noboolalpha; + void VisitVecMemoryOperation(HVecMemoryOperation* vec_mem_operation) OVERRIDE { + StartAttributeStream("alignment") << vec_mem_operation->GetAlignment().ToString(); } - void VisitVecMax(HVecMax* max) OVERRIDE { - StartAttributeStream("unsigned") << std::boolalpha << max->IsUnsigned() << std::noboolalpha; + void VisitVecHalvingAdd(HVecHalvingAdd* hadd) OVERRIDE { + VisitVecBinaryOperation(hadd); + StartAttributeStream("rounded") << std::boolalpha << hadd->IsRounded() << std::noboolalpha; } void VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) OVERRIDE { + VisitVecOperation(instruction); StartAttributeStream("kind") << instruction->GetOpKind(); } @@ -569,7 +572,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { if (!inputs.empty()) { StringList input_list; for (const HInstruction* input : inputs) { - input_list.NewEntryStream() << GetTypeId(input->GetType()) << input->GetId(); + input_list.NewEntryStream() << DataType::TypeId(input->GetType()) << input->GetId(); } StartAttributeStream() << input_list; } @@ -583,7 +586,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { for (size_t i = 0, e = environment->Size(); i < e; ++i) { HInstruction* insn = environment->GetInstructionAt(i); if (insn != nullptr) { - vregs.NewEntryStream() << GetTypeId(insn->GetType()) << insn->GetId(); + vregs.NewEntryStream() << DataType::TypeId(insn->GetType()) << insn->GetId(); } else { vregs.NewEntryStream() << "_"; } @@ -640,7 +643,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { if ((IsPass(HGraphBuilder::kBuilderPassName) || IsPass(HInliner::kInlinerPassName)) - && (instruction->GetType() == Primitive::kPrimNot)) { + && (instruction->GetType() == DataType::Type::kReference)) { ReferenceTypeInfo info = instruction->IsLoadClass() ? instruction->AsLoadClass()->GetLoadedClassRTI() : instruction->GetReferenceTypeInfo(); @@ -684,7 +687,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { size_t num_uses = instruction->GetUses().SizeSlow(); AddIndent(); output_ << bci << " " << num_uses << " " - << GetTypeId(instruction->GetType()) << instruction->GetId() << " "; + << DataType::TypeId(instruction->GetType()) << instruction->GetId() << " "; PrintInstruction(instruction); output_ << " " << kEndInstructionMarker << "\n"; } @@ -807,7 +810,7 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { AddIndent(); HInstruction* instruction = it.Current(); - output_ << instruction->GetId() << " " << GetTypeId(instruction->GetType()) + output_ << instruction->GetId() << " " << DataType::TypeId(instruction->GetType()) << instruction->GetId() << "[ "; for (const HInstruction* input : instruction->GetInputs()) { output_ << input->GetId() << " "; diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc index 8ea312d0ea..f05159b735 100644 --- a/compiler/optimizing/gvn.cc +++ b/compiler/optimizing/gvn.cc @@ -17,10 +17,11 @@ #include "gvn.h" #include "base/arena_bit_vector.h" -#include "base/arena_containers.h" #include "base/bit_vector-inl.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "base/utils.h" #include "side_effects_analysis.h" -#include "utils.h" namespace art { @@ -36,7 +37,7 @@ namespace art { class ValueSet : public ArenaObject<kArenaAllocGvn> { public: // Constructs an empty ValueSet which owns all its buckets. - explicit ValueSet(ArenaAllocator* allocator) + explicit ValueSet(ScopedArenaAllocator* allocator) : allocator_(allocator), num_buckets_(kMinimumNumberOfBuckets), buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), @@ -44,12 +45,13 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { num_entries_(0u) { // ArenaAllocator returns zeroed memory, so no need to set buckets to null. DCHECK(IsPowerOfTwo(num_buckets_)); + std::fill_n(buckets_, num_buckets_, nullptr); buckets_owned_.SetInitialBits(num_buckets_); } // Copy constructor. Depending on the load factor, it will either make a deep // copy (all buckets owned) or a shallow one (buckets pointing to the parent). - ValueSet(ArenaAllocator* allocator, const ValueSet& other) + ValueSet(ScopedArenaAllocator* allocator, const ValueSet& other) : allocator_(allocator), num_buckets_(other.IdealBucketCount()), buckets_(allocator->AllocArray<Node*>(num_buckets_, kArenaAllocGvn)), @@ -58,7 +60,7 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { // ArenaAllocator returns zeroed memory, so entries of buckets_ and // buckets_owned_ are initialized to null and false, respectively. DCHECK(IsPowerOfTwo(num_buckets_)); - PopulateFromInternal(other, /* is_dirty */ false); + PopulateFromInternal(other); } // Erases all values in this set and populates it with values from `other`. @@ -66,7 +68,7 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { if (this == &other) { return; } - PopulateFromInternal(other, /* is_dirty */ true); + PopulateFromInternal(other); } // Returns true if `this` has enough buckets so that if `other` is copied into @@ -159,33 +161,19 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { private: // Copies all entries from `other` to `this`. - // If `is_dirty` is set to true, existing data will be wiped first. It is - // assumed that `buckets_` and `buckets_owned_` are zero-allocated otherwise. - void PopulateFromInternal(const ValueSet& other, bool is_dirty) { + void PopulateFromInternal(const ValueSet& other) { DCHECK_NE(this, &other); DCHECK_GE(num_buckets_, other.IdealBucketCount()); if (num_buckets_ == other.num_buckets_) { // Hash table remains the same size. We copy the bucket pointers and leave // all buckets_owned_ bits false. - if (is_dirty) { - buckets_owned_.ClearAllBits(); - } else { - DCHECK_EQ(buckets_owned_.NumSetBits(), 0u); - } + buckets_owned_.ClearAllBits(); memcpy(buckets_, other.buckets_, num_buckets_ * sizeof(Node*)); } else { // Hash table size changes. We copy and rehash all entries, and set all // buckets_owned_ bits to true. - if (is_dirty) { - memset(buckets_, 0, num_buckets_ * sizeof(Node*)); - } else { - if (kIsDebugBuild) { - for (size_t i = 0; i < num_buckets_; ++i) { - DCHECK(buckets_[i] == nullptr) << i; - } - } - } + std::fill_n(buckets_, num_buckets_, nullptr); for (size_t i = 0; i < other.num_buckets_; ++i) { for (Node* node = other.buckets_[i]; node != nullptr; node = node->GetNext()) { size_t new_index = BucketIndex(node->GetHashCode()); @@ -208,7 +196,7 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { Node* GetNext() const { return next_; } void SetNext(Node* node) { next_ = node; } - Node* Dup(ArenaAllocator* allocator, Node* new_next = nullptr) { + Node* Dup(ScopedArenaAllocator* allocator, Node* new_next = nullptr) { return new (allocator) Node(instruction_, hash_code_, new_next); } @@ -313,8 +301,11 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { // Pure instructions are put into odd buckets to speed up deletion. Note that in the // case of irreducible loops, we don't put pure instructions in odd buckets, as we // need to delete them when entering the loop. - if (instruction->GetSideEffects().HasDependencies() || - instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { + // ClinitCheck is treated as a pure instruction since it's only executed + // once. + bool pure = !instruction->GetSideEffects().HasDependencies() || + instruction->IsClinitCheck(); + if (!pure || instruction->GetBlock()->GetGraph()->HasIrreducibleLoops()) { return (hash_code << 1) | 0; } else { return (hash_code << 1) | 1; @@ -326,7 +317,7 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { return hash_code & (num_buckets_ - 1); } - ArenaAllocator* const allocator_; + ScopedArenaAllocator* const allocator_; // The internal bucket implementation of the set. size_t const num_buckets_; @@ -350,15 +341,16 @@ class ValueSet : public ArenaObject<kArenaAllocGvn> { */ class GlobalValueNumberer : public ValueObject { public: - GlobalValueNumberer(ArenaAllocator* allocator, - HGraph* graph, + GlobalValueNumberer(HGraph* graph, const SideEffectsAnalysis& side_effects) : graph_(graph), - allocator_(allocator), + allocator_(graph->GetArenaStack()), side_effects_(side_effects), - sets_(graph->GetBlocks().size(), nullptr, allocator->Adapter(kArenaAllocGvn)), + sets_(graph->GetBlocks().size(), nullptr, allocator_.Adapter(kArenaAllocGvn)), visited_blocks_( - allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) {} + &allocator_, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGvn) { + visited_blocks_.ClearAllBits(); + } void Run(); @@ -368,7 +360,7 @@ class GlobalValueNumberer : public ValueObject { void VisitBasicBlock(HBasicBlock* block); HGraph* graph_; - ArenaAllocator* const allocator_; + ScopedArenaAllocator allocator_; const SideEffectsAnalysis& side_effects_; ValueSet* FindSetFor(HBasicBlock* block) const { @@ -396,7 +388,7 @@ class GlobalValueNumberer : public ValueObject { // ValueSet for blocks. Initially null, but for an individual block they // are allocated and populated by the dominator, and updated by all blocks // in the path from the dominator to the block. - ArenaVector<ValueSet*> sets_; + ScopedArenaVector<ValueSet*> sets_; // BitVector which serves as a fast-access map from block id to // visited/unvisited Boolean. @@ -407,7 +399,7 @@ class GlobalValueNumberer : public ValueObject { void GlobalValueNumberer::Run() { DCHECK(side_effects_.HasRun()); - sets_[graph_->GetEntryBlock()->GetBlockId()] = new (allocator_) ValueSet(allocator_); + sets_[graph_->GetEntryBlock()->GetBlockId()] = new (&allocator_) ValueSet(&allocator_); // Use the reverse post order to ensure the non back-edge predecessors of a block are // visited before the block itself. @@ -424,7 +416,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { // The entry block should only accumulate constant instructions, and // the builder puts constants only in the entry block. // Therefore, there is no need to propagate the value set to the next block. - set = new (allocator_) ValueSet(allocator_); + set = new (&allocator_) ValueSet(&allocator_); } else { HBasicBlock* dominator = block->GetDominator(); ValueSet* dominator_set = FindSetFor(dominator); @@ -443,7 +435,7 @@ void GlobalValueNumberer::VisitBasicBlock(HBasicBlock* block) { if (recyclable == nullptr) { // No block with a suitable ValueSet found. Allocate a new one and // copy `dominator_set` into it. - set = new (allocator_) ValueSet(allocator_, *dominator_set); + set = new (&allocator_) ValueSet(&allocator_, *dominator_set); } else { // Block with a recyclable ValueSet found. Clone `dominator_set` into it. set = FindSetFor(recyclable); @@ -566,7 +558,7 @@ HBasicBlock* GlobalValueNumberer::FindVisitedBlockWithRecyclableSet( } void GVNOptimization::Run() { - GlobalValueNumberer gvn(graph_->GetArena(), graph_, side_effects_); + GlobalValueNumberer gvn(graph_, side_effects_); gvn.Run(); } diff --git a/compiler/optimizing/gvn_test.cc b/compiler/optimizing/gvn_test.cc index f8d37bd714..3bf4cc35ba 100644 --- a/compiler/optimizing/gvn_test.cc +++ b/compiler/optimizing/gvn_test.cc @@ -14,86 +14,84 @@ * limitations under the License. */ +#include "gvn.h" + #include "base/arena_allocator.h" #include "builder.h" -#include "gvn.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" namespace art { -class GVNTest : public CommonCompilerTest {}; +class GVNTest : public OptimizingUnitTest {}; TEST_F(GVNTest, LocalFieldElimination) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue(graph->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); entry->AddInstruction(parameter); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* to_remove = block->GetLastInstruction(); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(43), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(43), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* different_offset = block->GetLastInstruction(); // Kill the value. - block->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, - parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + block->AddInstruction(new (GetAllocator()) HInstanceFieldSet(parameter, + parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* use_after_kill = block->GetLastInstruction(); - block->AddInstruction(new (&allocator) HExit()); + block->AddInstruction(new (GetAllocator()) HExit()); ASSERT_EQ(to_remove->GetBlock(), block); ASSERT_EQ(different_offset->GetBlock(), block); @@ -110,36 +108,33 @@ TEST_F(GVNTest, LocalFieldElimination) { } TEST_F(GVNTest, GlobalFieldElimination) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue(graph->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); entry->AddInstruction(parameter); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - - block->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); - HBasicBlock* then = new (&allocator) HBasicBlock(graph); - HBasicBlock* else_ = new (&allocator) HBasicBlock(graph); - HBasicBlock* join = new (&allocator) HBasicBlock(graph); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + + block->AddInstruction(new (GetAllocator()) HIf(block->GetLastInstruction())); + HBasicBlock* then = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* else_ = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* join = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(then); graph->AddBlock(else_); graph->AddBlock(join); @@ -149,36 +144,36 @@ TEST_F(GVNTest, GlobalFieldElimination) { then->AddSuccessor(join); else_->AddSuccessor(join); - then->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - then->AddInstruction(new (&allocator) HGoto()); - else_->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - else_->AddInstruction(new (&allocator) HGoto()); - join->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - join->AddInstruction(new (&allocator) HExit()); + then->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + then->AddInstruction(new (GetAllocator()) HGoto()); + else_->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + else_->AddInstruction(new (GetAllocator()) HGoto()); + join->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + join->AddInstruction(new (GetAllocator()) HExit()); graph->BuildDominatorTree(); SideEffectsAnalysis side_effects(graph); @@ -192,37 +187,34 @@ TEST_F(GVNTest, GlobalFieldElimination) { } TEST_F(GVNTest, LoopFieldElimination) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue(graph->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); entry->AddInstruction(parameter); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - block->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); - block->AddInstruction(new (&allocator) HGoto()); - - HBasicBlock* loop_header = new (&allocator) HBasicBlock(graph); - HBasicBlock* loop_body = new (&allocator) HBasicBlock(graph); - HBasicBlock* exit = new (&allocator) HBasicBlock(graph); + block->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); + block->AddInstruction(new (GetAllocator()) HGoto()); + + HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(loop_header); graph->AddBlock(loop_body); @@ -232,54 +224,54 @@ TEST_F(GVNTest, LoopFieldElimination) { loop_header->AddSuccessor(exit); loop_body->AddSuccessor(loop_header); - loop_header->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + loop_header->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* field_get_in_loop_header = loop_header->GetLastInstruction(); - loop_header->AddInstruction(new (&allocator) HIf(block->GetLastInstruction())); + loop_header->AddInstruction(new (GetAllocator()) HIf(block->GetLastInstruction())); // Kill inside the loop body to prevent field gets inside the loop header // and the body to be GVN'ed. - loop_body->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, - parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + loop_body->AddInstruction(new (GetAllocator()) HInstanceFieldSet(parameter, + parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* field_set = loop_body->GetLastInstruction(); - loop_body->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + loop_body->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* field_get_in_loop_body = loop_body->GetLastInstruction(); - loop_body->AddInstruction(new (&allocator) HGoto()); - - exit->AddInstruction(new (&allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + loop_body->AddInstruction(new (GetAllocator()) HGoto()); + + exit->AddInstruction(new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); HInstruction* field_get_in_exit = exit->GetLastInstruction(); - exit->AddInstruction(new (&allocator) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); ASSERT_EQ(field_get_in_loop_header->GetBlock(), loop_header); ASSERT_EQ(field_get_in_loop_body->GetBlock(), loop_body); @@ -314,22 +306,19 @@ TEST_F(GVNTest, LoopFieldElimination) { // Test that inner loops affect the side effects of the outer loop. TEST_F(GVNTest, LoopSideEffects) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - static const SideEffects kCanTriggerGC = SideEffects::CanTriggerGC(); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HBasicBlock* outer_loop_header = new (&allocator) HBasicBlock(graph); - HBasicBlock* outer_loop_body = new (&allocator) HBasicBlock(graph); - HBasicBlock* outer_loop_exit = new (&allocator) HBasicBlock(graph); - HBasicBlock* inner_loop_header = new (&allocator) HBasicBlock(graph); - HBasicBlock* inner_loop_body = new (&allocator) HBasicBlock(graph); - HBasicBlock* inner_loop_exit = new (&allocator) HBasicBlock(graph); + HBasicBlock* outer_loop_header = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* outer_loop_body = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* outer_loop_exit = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* inner_loop_header = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* inner_loop_body = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* inner_loop_exit = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(outer_loop_header); graph->AddBlock(outer_loop_body); @@ -347,20 +336,20 @@ TEST_F(GVNTest, LoopSideEffects) { inner_loop_body->AddSuccessor(inner_loop_header); inner_loop_exit->AddSuccessor(outer_loop_header); - HInstruction* parameter = new (&allocator) HParameterValue(graph->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimBoolean); + HInstruction* parameter = new (GetAllocator()) HParameterValue(graph->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kBool); entry->AddInstruction(parameter); - entry->AddInstruction(new (&allocator) HGoto()); - outer_loop_header->AddInstruction(new (&allocator) HSuspendCheck()); - outer_loop_header->AddInstruction(new (&allocator) HIf(parameter)); - outer_loop_body->AddInstruction(new (&allocator) HGoto()); - inner_loop_header->AddInstruction(new (&allocator) HSuspendCheck()); - inner_loop_header->AddInstruction(new (&allocator) HIf(parameter)); - inner_loop_body->AddInstruction(new (&allocator) HGoto()); - inner_loop_exit->AddInstruction(new (&allocator) HGoto()); - outer_loop_exit->AddInstruction(new (&allocator) HExit()); + entry->AddInstruction(new (GetAllocator()) HGoto()); + outer_loop_header->AddInstruction(new (GetAllocator()) HSuspendCheck()); + outer_loop_header->AddInstruction(new (GetAllocator()) HIf(parameter)); + outer_loop_body->AddInstruction(new (GetAllocator()) HGoto()); + inner_loop_header->AddInstruction(new (GetAllocator()) HSuspendCheck()); + inner_loop_header->AddInstruction(new (GetAllocator()) HIf(parameter)); + inner_loop_body->AddInstruction(new (GetAllocator()) HGoto()); + inner_loop_exit->AddInstruction(new (GetAllocator()) HGoto()); + outer_loop_exit->AddInstruction(new (GetAllocator()) HExit()); graph->BuildDominatorTree(); @@ -370,16 +359,16 @@ TEST_F(GVNTest, LoopSideEffects) { // Check that the only side effect of loops is to potentially trigger GC. { // Make one block with a side effect. - entry->AddInstruction(new (&allocator) HInstanceFieldSet(parameter, - parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0)); + entry->AddInstruction(new (GetAllocator()) HInstanceFieldSet(parameter, + parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0)); SideEffectsAnalysis side_effects(graph); side_effects.Run(); @@ -395,16 +384,16 @@ TEST_F(GVNTest, LoopSideEffects) { // Check that the side effects of the outer loop does not affect the inner loop. { outer_loop_body->InsertInstructionBefore( - new (&allocator) HInstanceFieldSet(parameter, - parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0), + new (GetAllocator()) HInstanceFieldSet(parameter, + parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0), outer_loop_body->GetLastInstruction()); SideEffectsAnalysis side_effects(graph); @@ -421,16 +410,16 @@ TEST_F(GVNTest, LoopSideEffects) { { outer_loop_body->RemoveInstruction(outer_loop_body->GetFirstInstruction()); inner_loop_body->InsertInstructionBefore( - new (&allocator) HInstanceFieldSet(parameter, - parameter, - nullptr, - Primitive::kPrimNot, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0), + new (GetAllocator()) HInstanceFieldSet(parameter, + parameter, + nullptr, + DataType::Type::kReference, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0), inner_loop_body->GetLastInstruction()); SideEffectsAnalysis side_effects(graph); diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc index 84b20f65e3..d270c6a28e 100644 --- a/compiler/optimizing/induction_var_analysis.cc +++ b/compiler/optimizing/induction_var_analysis.cc @@ -56,17 +56,22 @@ static void RotateEntryPhiFirst(HLoopInformation* loop, /** * Returns true if the from/to types denote a narrowing, integral conversion (precision loss). */ -static bool IsNarrowingIntegralConversion(Primitive::Type from, Primitive::Type to) { +static bool IsNarrowingIntegralConversion(DataType::Type from, DataType::Type to) { switch (from) { - case Primitive::kPrimLong: - return to == Primitive::kPrimByte || to == Primitive::kPrimShort - || to == Primitive::kPrimChar || to == Primitive::kPrimInt; - case Primitive::kPrimInt: - return to == Primitive::kPrimByte || to == Primitive::kPrimShort - || to == Primitive::kPrimChar; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - return to == Primitive::kPrimByte; + case DataType::Type::kInt64: + return to == DataType::Type::kUint8 || + to == DataType::Type::kInt8 || + to == DataType::Type::kUint16 || + to == DataType::Type::kInt16 || + to == DataType::Type::kInt32; + case DataType::Type::kInt32: + return to == DataType::Type::kUint8 || + to == DataType::Type::kInt8 || + to == DataType::Type::kUint16 || + to == DataType::Type::kInt16; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + return to == DataType::Type::kUint8 || to == DataType::Type::kInt8; default: return false; } @@ -75,36 +80,167 @@ static bool IsNarrowingIntegralConversion(Primitive::Type from, Primitive::Type /** * Returns result of implicit widening type conversion done in HIR. */ -static Primitive::Type ImplicitConversion(Primitive::Type type) { +static DataType::Type ImplicitConversion(DataType::Type type) { switch (type) { - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - return Primitive::kPrimInt; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + return DataType::Type::kInt32; default: return type; } } +/** + * Returns true if loop is guarded by "a cmp b" on entry. + */ +static bool IsGuardedBy(HLoopInformation* loop, + IfCondition cmp, + HInstruction* a, + HInstruction* b) { + // Chase back through straightline code to the first potential + // block that has a control dependence. + // guard: if (x) bypass + // | + // entry: straightline code + // | + // preheader + // | + // header + HBasicBlock* guard = loop->GetPreHeader(); + HBasicBlock* entry = loop->GetHeader(); + while (guard->GetPredecessors().size() == 1 && + guard->GetSuccessors().size() == 1) { + entry = guard; + guard = guard->GetSinglePredecessor(); + } + // Find guard. + HInstruction* control = guard->GetLastInstruction(); + if (!control->IsIf()) { + return false; + } + HIf* ifs = control->AsIf(); + HInstruction* if_expr = ifs->InputAt(0); + if (if_expr->IsCondition()) { + IfCondition other_cmp = ifs->IfTrueSuccessor() == entry + ? if_expr->AsCondition()->GetCondition() + : if_expr->AsCondition()->GetOppositeCondition(); + if (if_expr->InputAt(0) == a && if_expr->InputAt(1) == b) { + return cmp == other_cmp; + } else if (if_expr->InputAt(1) == a && if_expr->InputAt(0) == b) { + switch (cmp) { + case kCondLT: return other_cmp == kCondGT; + case kCondLE: return other_cmp == kCondGE; + case kCondGT: return other_cmp == kCondLT; + case kCondGE: return other_cmp == kCondLE; + default: LOG(FATAL) << "unexpected cmp: " << cmp; + } + } + } + return false; +} + +/* Finds first loop header phi use. */ +HInstruction* FindFirstLoopHeaderPhiUse(HLoopInformation* loop, HInstruction* instruction) { + for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { + if (use.GetUser()->GetBlock() == loop->GetHeader() && + use.GetUser()->IsPhi() && + use.GetUser()->InputAt(1) == instruction) { + return use.GetUser(); + } + } + return nullptr; +} + +/** + * Relinks the Phi structure after break-loop rewriting. + */ +bool FixOutsideUse(HLoopInformation* loop, + HInstruction* instruction, + HInstruction* replacement, + bool rewrite) { + // Deal with regular uses. + const HUseList<HInstruction*>& uses = instruction->GetUses(); + for (auto it = uses.begin(), end = uses.end(); it != end; ) { + HInstruction* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment prior to potential removal + if (user->GetBlock()->GetLoopInformation() != loop) { + if (replacement == nullptr) { + return false; + } else if (rewrite) { + user->ReplaceInput(replacement, index); + } + } + } + // Deal with environment uses. + const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); + for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { + HEnvironment* user = it->GetUser(); + size_t index = it->GetIndex(); + ++it; // increment prior to potential removal + if (user->GetHolder()->GetBlock()->GetLoopInformation() != loop) { + if (replacement == nullptr) { + return false; + } else if (rewrite) { + user->RemoveAsUserOfInput(index); + user->SetRawEnvAt(index, replacement); + replacement->AddEnvUseAt(user, index); + } + } + } + return true; +} + +/** + * Test and rewrite the loop body of a break-loop. Returns true on success. + */ +bool RewriteBreakLoopBody(HLoopInformation* loop, + HBasicBlock* body, + HInstruction* cond, + HInstruction* index, + HInstruction* upper, + bool rewrite) { + // Deal with Phis. Outside use prohibited, except for index (which gets exit value). + for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* exit_value = it.Current() == index ? upper : nullptr; + if (!FixOutsideUse(loop, it.Current(), exit_value, rewrite)) { + return false; + } + } + // Deal with other statements in header. + for (HInstruction* m = cond->GetPrevious(), *p = nullptr; m && !m->IsSuspendCheck(); m = p) { + p = m->GetPrevious(); + if (rewrite) { + m->MoveBefore(body->GetFirstInstruction(), false); + } + if (!FixOutsideUse(loop, m, FindFirstLoopHeaderPhiUse(loop, m), rewrite)) { + return false; + } + } + return true; +} + // // Class methods. // -HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph) - : HOptimization(graph, kInductionPassName), +HInductionVarAnalysis::HInductionVarAnalysis(HGraph* graph, const char* name) + : HOptimization(graph, name), global_depth_(0), - stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + stack_(graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), map_(std::less<HInstruction*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), - scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), + scc_(graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), cycle_(std::less<HInstruction*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), - type_(Primitive::kPrimVoid), + graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), + type_(DataType::Type::kVoid), induction_(std::less<HLoopInformation*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)), + graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)), cycles_(std::less<HPhi*>(), - graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) { + graph->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)) { } void HInductionVarAnalysis::Run() { @@ -259,7 +395,8 @@ void HInductionVarAnalysis::ClassifyNonTrivial(HLoopInformation* loop) { // Rotate proper loop-phi to front. if (size > 1) { - ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)); + ArenaVector<HInstruction*> other( + graph_->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)); RotateEntryPhiFirst(loop, &scc_, &other); } @@ -520,8 +657,8 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferMul(Inducti HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::TransferConversion( InductionInfo* a, - Primitive::Type from, - Primitive::Type to) { + DataType::Type from, + DataType::Type to) { if (a != nullptr) { // Allow narrowing conversion on linear induction in certain cases: // induction is already at narrow type, or can be made narrower. @@ -676,7 +813,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveOp(HLoopInform CreateConstant(0, type_), c->fetch, type_); - }; + } break; case kRem: // Idiomatic MOD wrap-around induction. @@ -723,15 +860,15 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveConversion( HLoopInformation* loop, HInstruction* entry_phi, HTypeConversion* conversion) { - Primitive::Type from = conversion->GetInputType(); - Primitive::Type to = conversion->GetResultType(); + DataType::Type from = conversion->GetInputType(); + DataType::Type to = conversion->GetResultType(); // A narrowing conversion is allowed as *last* operation of the cycle of a linear induction // with an initial value that fits the type, provided that the narrowest encountered type is // recorded with the induction to account for the precision loss. The narrower induction does // *not* transfer to any wider operations, however, since these may yield out-of-type values if (entry_phi->InputCount() == 2 && conversion == entry_phi->InputAt(1)) { - int64_t min = Primitive::MinValueOfIntegralType(to); - int64_t max = Primitive::MaxValueOfIntegralType(to); + int64_t min = DataType::MinValueOfIntegralType(to); + int64_t max = DataType::MaxValueOfIntegralType(to); int64_t value = 0; InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0)); if (IsNarrowingIntegralConversion(from, to) && @@ -747,6 +884,10 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveConversion( return nullptr; } +// +// Loop trip count analysis methods. +// + void HInductionVarAnalysis::VisitControl(HLoopInformation* loop) { HInstruction* control = loop->GetHeader()->GetLastInstruction(); if (control->IsIf()) { @@ -761,33 +902,34 @@ void HInductionVarAnalysis::VisitControl(HLoopInformation* loop) { HCondition* condition = if_expr->AsCondition(); InductionInfo* a = LookupInfo(loop, condition->InputAt(0)); InductionInfo* b = LookupInfo(loop, condition->InputAt(1)); - Primitive::Type type = ImplicitConversion(condition->InputAt(0)->GetType()); + DataType::Type type = ImplicitConversion(condition->InputAt(0)->GetType()); // Determine if the loop control uses a known sequence on an if-exit (X outside) or on // an if-iterate (X inside), expressed as if-iterate when passed into VisitCondition(). if (a == nullptr || b == nullptr) { return; // Loop control is not a sequence. } else if (if_true->GetLoopInformation() != loop && if_false->GetLoopInformation() == loop) { - VisitCondition(loop, a, b, type, condition->GetOppositeCondition()); + VisitCondition(loop, if_false, a, b, type, condition->GetOppositeCondition()); } else if (if_true->GetLoopInformation() == loop && if_false->GetLoopInformation() != loop) { - VisitCondition(loop, a, b, type, condition->GetCondition()); + VisitCondition(loop, if_true, a, b, type, condition->GetCondition()); } } } } void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, + HBasicBlock* body, InductionInfo* a, InductionInfo* b, - Primitive::Type type, + DataType::Type type, IfCondition cmp) { if (a->induction_class == kInvariant && b->induction_class == kLinear) { // Swap condition if induction is at right-hand-side (e.g. U > i is same as i < U). switch (cmp) { - case kCondLT: VisitCondition(loop, b, a, type, kCondGT); break; - case kCondLE: VisitCondition(loop, b, a, type, kCondGE); break; - case kCondGT: VisitCondition(loop, b, a, type, kCondLT); break; - case kCondGE: VisitCondition(loop, b, a, type, kCondLE); break; - case kCondNE: VisitCondition(loop, b, a, type, kCondNE); break; + case kCondLT: VisitCondition(loop, body, b, a, type, kCondGT); break; + case kCondLE: VisitCondition(loop, body, b, a, type, kCondGE); break; + case kCondGT: VisitCondition(loop, body, b, a, type, kCondLT); break; + case kCondGE: VisitCondition(loop, body, b, a, type, kCondLE); break; + case kCondNE: VisitCondition(loop, body, b, a, type, kCondNE); break; default: break; } } else if (a->induction_class == kLinear && b->induction_class == kInvariant) { @@ -795,24 +937,30 @@ void HInductionVarAnalysis::VisitCondition(HLoopInformation* loop, InductionInfo* lower_expr = a->op_b; InductionInfo* upper_expr = b; InductionInfo* stride_expr = a->op_a; - // Constant stride? + // Test for constant stride and integral condition. int64_t stride_value = 0; if (!IsExact(stride_expr, &stride_value)) { - return; + return; // unknown stride + } else if (type != DataType::Type::kInt32 && type != DataType::Type::kInt64) { + return; // not integral } - // Rewrite condition i != U into strict end condition i < U or i > U if this end condition - // is reached exactly (tested by verifying if the loop has a unit stride and the non-strict - // condition would be always taken). + // Since loops with a i != U condition will not be normalized by the method below, first + // try to rewrite a break-loop with terminating condition i != U into an equivalent loop + // with non-strict end condition i <= U or i >= U if such a rewriting is possible and safe. + if (cmp == kCondNE && RewriteBreakLoop(loop, body, stride_value, type)) { + cmp = stride_value > 0 ? kCondLE : kCondGE; + } + // If this rewriting failed, try to rewrite condition i != U into strict end condition i < U + // or i > U if this end condition is reached exactly (tested by verifying if the loop has a + // unit stride and the non-strict condition would be always taken). if (cmp == kCondNE && ((stride_value == +1 && IsTaken(lower_expr, upper_expr, kCondLE)) || (stride_value == -1 && IsTaken(lower_expr, upper_expr, kCondGE)))) { cmp = stride_value > 0 ? kCondLT : kCondGT; } - // Only accept integral condition. A mismatch between the type of condition and the induction - // is only allowed if the, necessarily narrower, induction range fits the narrower control. - if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { - return; // not integral - } else if (type != a->type && - !FitsNarrowerControl(lower_expr, upper_expr, stride_value, a->type, cmp)) { + // A mismatch between the type of condition and the induction is only allowed if the, + // necessarily narrower, induction range fits the narrower control. + if (type != a->type && + !FitsNarrowerControl(lower_expr, upper_expr, stride_value, a->type, cmp)) { return; // mismatched type } // Normalize a linear loop control with a nonzero stride: @@ -830,7 +978,7 @@ void HInductionVarAnalysis::VisitTripCount(HLoopInformation* loop, InductionInfo* upper_expr, InductionInfo* stride_expr, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp) { // Any loop of the general form: // @@ -931,10 +1079,10 @@ bool HInductionVarAnalysis::IsTaken(InductionInfo* lower_expr, bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp) { - int64_t min = Primitive::MinValueOfIntegralType(type); - int64_t max = Primitive::MaxValueOfIntegralType(type); + int64_t min = DataType::MinValueOfIntegralType(type); + int64_t max = DataType::MaxValueOfIntegralType(type); // Some rules under which it is certain at compile-time that the loop is finite. int64_t value; switch (cmp) { @@ -957,10 +1105,10 @@ bool HInductionVarAnalysis::IsFinite(InductionInfo* upper_expr, bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr, InductionInfo* upper_expr, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp) { - int64_t min = Primitive::MinValueOfIntegralType(type); - int64_t max = Primitive::MaxValueOfIntegralType(type); + int64_t min = DataType::MinValueOfIntegralType(type); + int64_t max = DataType::MaxValueOfIntegralType(type); // Inclusive test need one extra. if (stride_value != 1 && stride_value != -1) { return false; // non-unit stride @@ -977,6 +1125,69 @@ bool HInductionVarAnalysis::FitsNarrowerControl(InductionInfo* lower_expr, IsAtMost(upper_expr, &value) && value <= max; } +bool HInductionVarAnalysis::RewriteBreakLoop(HLoopInformation* loop, + HBasicBlock* body, + int64_t stride_value, + DataType::Type type) { + // Only accept unit stride. + if (std::abs(stride_value) != 1) { + return false; + } + // Simple terminating i != U condition, used nowhere else. + HIf* ifs = loop->GetHeader()->GetLastInstruction()->AsIf(); + HInstruction* cond = ifs->InputAt(0); + if (ifs->GetPrevious() != cond || !cond->HasOnlyOneNonEnvironmentUse()) { + return false; + } + int c = LookupInfo(loop, cond->InputAt(0))->induction_class == kLinear ? 0 : 1; + HInstruction* index = cond->InputAt(c); + HInstruction* upper = cond->InputAt(1 - c); + // Safe to rewrite into i <= U? + IfCondition cmp = stride_value > 0 ? kCondLE : kCondGE; + if (!index->IsPhi() || !IsFinite(LookupInfo(loop, upper), stride_value, type, cmp)) { + return false; + } + // Body consists of update to index i only, used nowhere else. + if (body->GetSuccessors().size() != 1 || + body->GetSingleSuccessor() != loop->GetHeader() || + !body->GetPhis().IsEmpty() || + body->GetInstructions().IsEmpty() || + body->GetFirstInstruction() != index->InputAt(1) || + !body->GetFirstInstruction()->HasOnlyOneNonEnvironmentUse() || + !body->GetFirstInstruction()->GetNext()->IsGoto()) { + return false; + } + // Always taken or guarded by enclosing condition. + if (!IsTaken(LookupInfo(loop, index)->op_b, LookupInfo(loop, upper), cmp) && + !IsGuardedBy(loop, cmp, index->InputAt(0), upper)) { + return false; + } + // Test if break-loop body can be written, and do so on success. + if (RewriteBreakLoopBody(loop, body, cond, index, upper, /*rewrite*/ false)) { + RewriteBreakLoopBody(loop, body, cond, index, upper, /*rewrite*/ true); + } else { + return false; + } + // Rewrite condition in HIR. + if (ifs->IfTrueSuccessor() != body) { + cmp = (cmp == kCondLE) ? kCondGT : kCondLT; + } + HInstruction* rep = nullptr; + switch (cmp) { + case kCondLT: rep = new (graph_->GetAllocator()) HLessThan(index, upper); break; + case kCondGT: rep = new (graph_->GetAllocator()) HGreaterThan(index, upper); break; + case kCondLE: rep = new (graph_->GetAllocator()) HLessThanOrEqual(index, upper); break; + case kCondGE: rep = new (graph_->GetAllocator()) HGreaterThanOrEqual(index, upper); break; + default: LOG(FATAL) << cmp; UNREACHABLE(); + } + loop->GetHeader()->ReplaceAndRemoveInstructionWith(cond, rep); + return true; +} + +// +// Helper methods. +// + void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info) { @@ -985,7 +1196,7 @@ void HInductionVarAnalysis::AssignInfo(HLoopInformation* loop, it = induction_.Put(loop, ArenaSafeMap<HInstruction*, InductionInfo*>( std::less<HInstruction*>(), - graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis))); + graph_->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis))); } it->second.Put(instruction, info); } @@ -1008,13 +1219,13 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::LookupInfo(HLoopInf } HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateConstant(int64_t value, - Primitive::Type type) { + DataType::Type type) { HInstruction* constant; switch (type) { - case Primitive::kPrimDouble: constant = graph_->GetDoubleConstant(value); break; - case Primitive::kPrimFloat: constant = graph_->GetFloatConstant(value); break; - case Primitive::kPrimLong: constant = graph_->GetLongConstant(value); break; - default: constant = graph_->GetIntConstant(value); break; + case DataType::Type::kFloat64: constant = graph_->GetDoubleConstant(value); break; + case DataType::Type::kFloat32: constant = graph_->GetFloatConstant(value); break; + case DataType::Type::kInt64: constant = graph_->GetLongConstant(value); break; + default: constant = graph_->GetIntConstant(value); break; } return CreateInvariantFetch(constant); } @@ -1076,7 +1287,7 @@ HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::CreateSimplifiedInv return CreateSimplifiedInvariant(kSub, b->op_b, b->op_a); } } - return new (graph_->GetArena()) InductionInfo( + return new (graph_->GetAllocator()) InductionInfo( kInvariant, op, a, b, nullptr, ImplicitConversion(b->type)); } @@ -1100,11 +1311,11 @@ HInstruction* HInductionVarAnalysis::GetShiftConstant(HLoopInformation* loop, InductionInfo* b = LookupInfo(loop, instruction->InputAt(1)); int64_t value = -1; if (IsExact(b, &value)) { - Primitive::Type type = instruction->InputAt(0)->GetType(); - if (type == Primitive::kPrimInt && 0 <= value && value < 31) { + DataType::Type type = instruction->InputAt(0)->GetType(); + if (type == DataType::Type::kInt32 && 0 <= value && value < 31) { return graph_->GetIntConstant(1 << value); } - if (type == Primitive::kPrimLong && 0 <= value && value < 63) { + if (type == DataType::Type::kInt64 && 0 <= value && value < 63) { return graph_->GetLongConstant(1L << value); } } @@ -1113,7 +1324,7 @@ HInstruction* HInductionVarAnalysis::GetShiftConstant(HLoopInformation* loop, void HInductionVarAnalysis::AssignCycle(HPhi* phi) { ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>( - graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second; + graph_->GetAllocator()->Adapter(kArenaAllocInductionVarAnalysis)))->second; for (HInstruction* i : scc_) { set->insert(i); } @@ -1142,11 +1353,12 @@ bool HInductionVarAnalysis::IsAtLeast(InductionInfo* info, int64_t* value) { bool HInductionVarAnalysis::IsNarrowingLinear(InductionInfo* info) { return info != nullptr && info->induction_class == kLinear && - (info->type == Primitive::kPrimByte || - info->type == Primitive::kPrimShort || - info->type == Primitive::kPrimChar || - (info->type == Primitive::kPrimInt && (info->op_a->type == Primitive::kPrimLong || - info->op_b->type == Primitive::kPrimLong))); + (info->type == DataType::Type::kUint8 || + info->type == DataType::Type::kInt8 || + info->type == DataType::Type::kUint16 || + info->type == DataType::Type::kInt16 || + (info->type == DataType::Type::kInt32 && (info->op_a->type == DataType::Type::kInt64 || + info->op_b->type == DataType::Type::kInt64))); } bool HInductionVarAnalysis::InductionEqual(InductionInfo* info1, @@ -1207,12 +1419,12 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { DCHECK(info->operation == kNop); return "(" + InductionToString(info->op_a) + " * i + " + InductionToString(info->op_b) + "):" + - Primitive::PrettyDescriptor(info->type); + DataType::PrettyDescriptor(info->type); } else if (info->induction_class == kPolynomial) { DCHECK(info->operation == kNop); return "poly(sum_lt(" + InductionToString(info->op_a) + ") + " + InductionToString(info->op_b) + "):" + - Primitive::PrettyDescriptor(info->type); + DataType::PrettyDescriptor(info->type); } else if (info->induction_class == kGeometric) { DCHECK(info->operation == kMul || info->operation == kDiv); DCHECK(info->fetch != nullptr); @@ -1220,17 +1432,17 @@ std::string HInductionVarAnalysis::InductionToString(InductionInfo* info) { FetchToString(info->fetch) + (info->operation == kMul ? " ^ i + " : " ^ -i + ") + InductionToString(info->op_b) + "):" + - Primitive::PrettyDescriptor(info->type); + DataType::PrettyDescriptor(info->type); } else if (info->induction_class == kWrapAround) { DCHECK(info->operation == kNop); return "wrap(" + InductionToString(info->op_a) + ", " + InductionToString(info->op_b) + "):" + - Primitive::PrettyDescriptor(info->type); + DataType::PrettyDescriptor(info->type); } else if (info->induction_class == kPeriodic) { DCHECK(info->operation == kNop); return "periodic(" + InductionToString(info->op_a) + ", " + InductionToString(info->op_b) + "):" + - Primitive::PrettyDescriptor(info->type); + DataType::PrettyDescriptor(info->type); } } } diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h index 39b39cdf55..acad77d35f 100644 --- a/compiler/optimizing/induction_var_analysis.h +++ b/compiler/optimizing/induction_var_analysis.h @@ -35,7 +35,7 @@ namespace art { */ class HInductionVarAnalysis : public HOptimization { public: - explicit HInductionVarAnalysis(HGraph* graph); + explicit HInductionVarAnalysis(HGraph* graph, const char* name = kInductionPassName); void Run() OVERRIDE; @@ -103,7 +103,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* a, InductionInfo* b, HInstruction* f, - Primitive::Type t) + DataType::Type t) : induction_class(ic), operation(op), op_a(a), @@ -115,7 +115,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* op_a; InductionInfo* op_b; HInstruction* fetch; - Primitive::Type type; // precision of operation + DataType::Type type; // precision of operation }; bool IsVisitedNode(HInstruction* instruction) const { @@ -129,16 +129,16 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* CreateInvariantFetch(HInstruction* f) { DCHECK(f != nullptr); - return new (graph_->GetArena()) + return new (graph_->GetAllocator()) InductionInfo(kInvariant, kFetch, nullptr, nullptr, f, f->GetType()); } InductionInfo* CreateTripCount(InductionOp op, InductionInfo* a, InductionInfo* b, - Primitive::Type type) { + DataType::Type type) { DCHECK(a != nullptr && b != nullptr); - return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, type); + return new (graph_->GetAllocator()) InductionInfo(kInvariant, op, a, b, nullptr, type); } InductionInfo* CreateInduction(InductionClass ic, @@ -146,9 +146,9 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* a, InductionInfo* b, HInstruction* f, - Primitive::Type type) { + DataType::Type type) { DCHECK(a != nullptr && b != nullptr); - return new (graph_->GetArena()) InductionInfo(ic, op, a, b, f, type); + return new (graph_->GetAllocator()) InductionInfo(ic, op, a, b, f, type); } // Methods for analysis. @@ -167,7 +167,7 @@ class HInductionVarAnalysis : public HOptimization { InductionInfo* TransferAddSub(InductionInfo* a, InductionInfo* b, InductionOp op); InductionInfo* TransferNeg(InductionInfo* a); InductionInfo* TransferMul(InductionInfo* a, InductionInfo* b); - InductionInfo* TransferConversion(InductionInfo* a, Primitive::Type from, Primitive::Type to); + InductionInfo* TransferConversion(InductionInfo* a, DataType::Type from, DataType::Type to); // Solvers. InductionInfo* SolvePhi(HInstruction* phi, size_t input_index, size_t adjust_input_size); @@ -195,35 +195,48 @@ class HInductionVarAnalysis : public HOptimization { HInstruction* entry_phi, HTypeConversion* conversion); + // + // Loop trip count analysis methods. + // + // Trip count information. void VisitControl(HLoopInformation* loop); void VisitCondition(HLoopInformation* loop, + HBasicBlock* body, InductionInfo* a, InductionInfo* b, - Primitive::Type type, + DataType::Type type, IfCondition cmp); void VisitTripCount(HLoopInformation* loop, InductionInfo* lower_expr, InductionInfo* upper_expr, InductionInfo* stride, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp); bool IsTaken(InductionInfo* lower_expr, InductionInfo* upper_expr, IfCondition cmp); bool IsFinite(InductionInfo* upper_expr, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp); bool FitsNarrowerControl(InductionInfo* lower_expr, InductionInfo* upper_expr, int64_t stride_value, - Primitive::Type type, + DataType::Type type, IfCondition cmp); + bool RewriteBreakLoop(HLoopInformation* loop, + HBasicBlock* body, + int64_t stride_value, + DataType::Type type); + + // + // Helper methods. + // // Assign and lookup. void AssignInfo(HLoopInformation* loop, HInstruction* instruction, InductionInfo* info); InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction); - InductionInfo* CreateConstant(int64_t value, Primitive::Type type); + InductionInfo* CreateConstant(int64_t value, DataType::Type type); InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b); HInstruction* GetShiftConstant(HLoopInformation* loop, HInstruction* instruction, @@ -250,7 +263,7 @@ class HInductionVarAnalysis : public HOptimization { ArenaSafeMap<HInstruction*, NodeInfo> map_; ArenaVector<HInstruction*> scc_; ArenaSafeMap<HInstruction*, InductionInfo*> cycle_; - Primitive::Type type_; + DataType::Type type_; /** * Maintains the results of the analysis as a mapping from loops to a mapping from instructions diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc index 9516ccb385..4c11ad4643 100644 --- a/compiler/optimizing/induction_var_analysis_test.cc +++ b/compiler/optimizing/induction_var_analysis_test.cc @@ -27,12 +27,10 @@ namespace art { /** * Fixture class for the InductionVarAnalysis tests. */ -class InductionVarAnalysisTest : public CommonCompilerTest { +class InductionVarAnalysisTest : public OptimizingUnitTest { public: InductionVarAnalysisTest() - : pool_(), - allocator_(&pool_), - iva_(nullptr), + : iva_(nullptr), entry_(nullptr), return_(nullptr), exit_(nullptr), @@ -44,7 +42,7 @@ class InductionVarAnalysisTest : public CommonCompilerTest { constant100_(nullptr), constantm1_(nullptr), float_constant0_(nullptr) { - graph_ = CreateGraph(&allocator_); + graph_ = CreateGraph(); } ~InductionVarAnalysisTest() { } @@ -52,15 +50,15 @@ class InductionVarAnalysisTest : public CommonCompilerTest { // Builds single for-loop at depth d. void BuildForLoop(int d, int n) { ASSERT_LT(d, n); - loop_preheader_[d] = new (&allocator_) HBasicBlock(graph_); + loop_preheader_[d] = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_[d]); - loop_header_[d] = new (&allocator_) HBasicBlock(graph_); + loop_header_[d] = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_header_[d]); loop_preheader_[d]->AddSuccessor(loop_header_[d]); if (d < (n - 1)) { BuildForLoop(d + 1, n); } - loop_body_[d] = new (&allocator_) HBasicBlock(graph_); + loop_body_[d] = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_body_[d]); loop_body_[d]->AddSuccessor(loop_header_[d]); if (d < (n - 1)) { @@ -79,12 +77,12 @@ class InductionVarAnalysisTest : public CommonCompilerTest { graph_->SetNumberOfVRegs(n + 3); // Build basic blocks with entry, nested loop, exit. - entry_ = new (&allocator_) HBasicBlock(graph_); + entry_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_); BuildForLoop(0, n); - return_ = new (&allocator_) HBasicBlock(graph_); + return_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(return_); - exit_ = new (&allocator_) HBasicBlock(graph_); + exit_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(exit_); entry_->AddSuccessor(loop_preheader_[0]); loop_header_[0]->AddSuccessor(return_); @@ -93,8 +91,8 @@ class InductionVarAnalysisTest : public CommonCompilerTest { graph_->SetExitBlock(exit_); // Provide entry and exit instructions. - parameter_ = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot, true); + parameter_ = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference, true); entry_->AddInstruction(parameter_); constant0_ = graph_->GetIntConstant(0); constant1_ = graph_->GetIntConstant(1); @@ -103,20 +101,20 @@ class InductionVarAnalysisTest : public CommonCompilerTest { constant100_ = graph_->GetIntConstant(100); constantm1_ = graph_->GetIntConstant(-1); float_constant0_ = graph_->GetFloatConstant(0.0f); - return_->AddInstruction(new (&allocator_) HReturnVoid()); - exit_->AddInstruction(new (&allocator_) HExit()); + return_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_->AddInstruction(new (GetAllocator()) HExit()); // Provide loop instructions. for (int d = 0; d < n; d++) { - basic_[d] = new (&allocator_) HPhi(&allocator_, d, 0, Primitive::kPrimInt); - loop_preheader_[d]->AddInstruction(new (&allocator_) HGoto()); + basic_[d] = new (GetAllocator()) HPhi(GetAllocator(), d, 0, DataType::Type::kInt32); + loop_preheader_[d]->AddInstruction(new (GetAllocator()) HGoto()); loop_header_[d]->AddPhi(basic_[d]); - HInstruction* compare = new (&allocator_) HLessThan(basic_[d], constant100_); + HInstruction* compare = new (GetAllocator()) HLessThan(basic_[d], constant100_); loop_header_[d]->AddInstruction(compare); - loop_header_[d]->AddInstruction(new (&allocator_) HIf(compare)); - increment_[d] = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[d], constant1_); + loop_header_[d]->AddInstruction(new (GetAllocator()) HIf(compare)); + increment_[d] = new (GetAllocator()) HAdd(DataType::Type::kInt32, basic_[d], constant1_); loop_body_[d]->AddInstruction(increment_[d]); - loop_body_[d]->AddInstruction(new (&allocator_) HGoto()); + loop_body_[d]->AddInstruction(new (GetAllocator()) HGoto()); basic_[d]->AddInput(constant0_); basic_[d]->AddInput(increment_[d]); @@ -125,9 +123,9 @@ class InductionVarAnalysisTest : public CommonCompilerTest { // Builds if-statement at depth d. HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock** ifF) { - HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_); - HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_); - HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* cond = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* ifTrue = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* ifFalse = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(cond); graph_->AddBlock(ifTrue); graph_->AddBlock(ifFalse); @@ -137,11 +135,11 @@ class InductionVarAnalysisTest : public CommonCompilerTest { cond->AddSuccessor(ifFalse); ifTrue->AddSuccessor(loop_body_[d]); ifFalse->AddSuccessor(loop_body_[d]); - cond->AddInstruction(new (&allocator_) HIf(parameter_)); + cond->AddInstruction(new (GetAllocator()) HIf(parameter_)); *ifT = ifTrue; *ifF = ifFalse; - HPhi* select_phi = new (&allocator_) HPhi(&allocator_, -1, 0, Primitive::kPrimInt); + HPhi* select_phi = new (GetAllocator()) HPhi(GetAllocator(), -1, 0, DataType::Type::kInt32); loop_body_[d]->AddPhi(select_phi); return select_phi; } @@ -154,7 +152,7 @@ class InductionVarAnalysisTest : public CommonCompilerTest { // Inserts a phi to loop header at depth d and returns it. HPhi* InsertLoopPhi(int vreg, int d) { - HPhi* phi = new (&allocator_) HPhi(&allocator_, vreg, 0, Primitive::kPrimInt); + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), vreg, 0, DataType::Type::kInt32); loop_header_[d]->AddPhi(phi); return phi; } @@ -164,8 +162,8 @@ class InductionVarAnalysisTest : public CommonCompilerTest { HInstruction* InsertArrayStore(HInstruction* subscript, int d) { // ArraySet is given a float value in order to avoid SsaBuilder typing // it from the array's non-existent reference type info. - return InsertInstruction(new (&allocator_) HArraySet( - parameter_, subscript, float_constant0_, Primitive::kPrimFloat, 0), d); + return InsertInstruction(new (GetAllocator()) HArraySet( + parameter_, subscript, float_constant0_, DataType::Type::kFloat32, 0), d); } // Returns induction information of instruction in loop at depth d. @@ -197,13 +195,11 @@ class InductionVarAnalysisTest : public CommonCompilerTest { // Performs InductionVarAnalysis (after proper set up). void PerformInductionVarAnalysis() { graph_->BuildDominatorTree(); - iva_ = new (&allocator_) HInductionVarAnalysis(graph_); + iva_ = new (GetAllocator()) HInductionVarAnalysis(graph_); iva_->Run(); } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; HInductionVarAnalysis* iva_; @@ -265,8 +261,8 @@ TEST_F(InductionVarAnalysisTest, FindBasicInduction) { HInstruction* store = InsertArrayStore(basic_[0], 0); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); - EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):Int32", GetInductionInfo(increment_[0], 0).c_str()); // Offset matters! EXPECT_FALSE(HaveSameInduction(store->InputAt(1), increment_[0])); @@ -286,22 +282,22 @@ TEST_F(InductionVarAnalysisTest, FindDerivedInduction) { // } BuildLoopNest(1); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, constant100_, basic_[0]), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, constant100_, basic_[0]), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, constant100_, basic_[0]), 0); HInstruction* shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, basic_[0], constant1_), 0); HInstruction* neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, basic_[0]), 0); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (100)):PrimInt", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("(( - (1)) * i + (100)):PrimInt", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("((100) * i + (0)):PrimInt", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("((2) * i + (0)):PrimInt", GetInductionInfo(shl, 0).c_str()); - EXPECT_STREQ("(( - (1)) * i + (0)):PrimInt", GetInductionInfo(neg, 0).c_str()); + EXPECT_STREQ("((1) * i + (100)):Int32", GetInductionInfo(add, 0).c_str()); + EXPECT_STREQ("(( - (1)) * i + (100)):Int32", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("((100) * i + (0)):Int32", GetInductionInfo(mul, 0).c_str()); + EXPECT_STREQ("((2) * i + (0)):Int32", GetInductionInfo(shl, 0).c_str()); + EXPECT_STREQ("(( - (1)) * i + (0)):Int32", GetInductionInfo(neg, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindChainInduction) { @@ -318,19 +314,19 @@ TEST_F(InductionVarAnalysisTest, FindChainInduction) { k_header->AddInput(constant0_); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* store1 = InsertArrayStore(add, 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, add, constant1_), 0); HInstruction* store2 = InsertArrayStore(sub, 0); k_header->AddInput(sub); PerformInductionVarAnalysis(); - EXPECT_STREQ("(((100) - (1)) * i + (0)):PrimInt", + EXPECT_STREQ("(((100) - (1)) * i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("(((100) - (1)) * i + (100)):PrimInt", + EXPECT_STREQ("(((100) - (1)) * i + (100)):Int32", GetInductionInfo(store1->InputAt(1), 0).c_str()); - EXPECT_STREQ("(((100) - (1)) * i + ((100) - (1))):PrimInt", + EXPECT_STREQ("(((100) - (1)) * i + ((100) - (1))):Int32", GetInductionInfo(store2->InputAt(1), 0).c_str()); } @@ -351,11 +347,11 @@ TEST_F(InductionVarAnalysisTest, FindTwoWayBasicInduction) { HPhi* k_body = BuildIf(0, &ifTrue, &ifFalse); // True-branch. - HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_); + HInstruction* inc1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant1_); ifTrue->AddInstruction(inc1); k_body->AddInput(inc1); // False-branch. - HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_); + HInstruction* inc2 = new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant1_); ifFalse->AddInstruction(inc2); k_body->AddInput(inc2); // Merge over a phi. @@ -363,8 +359,8 @@ TEST_F(InductionVarAnalysisTest, FindTwoWayBasicInduction) { k_header->AddInput(k_body); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); // Both increments get same induction. EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc1)); @@ -384,18 +380,18 @@ TEST_F(InductionVarAnalysisTest, FindTwoWayDerivedInduction) { HPhi* k = BuildIf(0, &ifTrue, &ifFalse); // True-branch. - HInstruction* inc1 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_); + HInstruction* inc1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, basic_[0], constant1_); ifTrue->AddInstruction(inc1); k->AddInput(inc1); // False-branch. - HInstruction* inc2 = new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], constant1_); + HInstruction* inc2 = new (GetAllocator()) HAdd(DataType::Type::kInt32, basic_[0], constant1_); ifFalse->AddInstruction(inc2); k->AddInput(inc2); // Merge over a phi. HInstruction* store = InsertArrayStore(k, 0); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); // Both increments get same induction. EXPECT_TRUE(HaveSameInduction(store->InputAt(1), inc1)); @@ -412,17 +408,17 @@ TEST_F(InductionVarAnalysisTest, AddLinear) { BuildLoopNest(1); HInstruction* add1 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, basic_[0], basic_[0]), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, basic_[0], basic_[0]), 0); HInstruction* add2 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, constant7_, basic_[0]), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, constant7_, basic_[0]), 0); HInstruction* add3 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, add1, add2), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, add1, add2), 0); PerformInductionVarAnalysis(); - EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(basic_[0], 0).c_str()); - EXPECT_STREQ("(((1) + (1)) * i + (0)):PrimInt", GetInductionInfo(add1, 0).c_str()); - EXPECT_STREQ("((1) * i + (7)):PrimInt", GetInductionInfo(add2, 0).c_str()); - EXPECT_STREQ("((((1) + (1)) + (1)) * i + (7)):PrimInt", GetInductionInfo(add3, 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int32", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("(((1) + (1)) * i + (0)):Int32", GetInductionInfo(add1, 0).c_str()); + EXPECT_STREQ("((1) * i + (7)):Int32", GetInductionInfo(add2, 0).c_str()); + EXPECT_STREQ("((((1) + (1)) + (1)) * i + (7)):Int32", GetInductionInfo(add3, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindPolynomialInduction) { @@ -438,18 +434,18 @@ TEST_F(InductionVarAnalysisTest, FindPolynomialInduction) { k_header->AddInput(constant1_); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, basic_[0], constant2_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, basic_[0], constant2_), 0); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, mul), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, constant100_, mul), 0); HInstruction* pol = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, add, k_header), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, add, k_header), 0); k_header->AddInput(pol); PerformInductionVarAnalysis(); // Note, only the phi in the cycle and the base linear induction are classified. - EXPECT_STREQ("poly(sum_lt(((2) * i + (100)):PrimInt) + (1)):PrimInt", + EXPECT_STREQ("poly(sum_lt(((2) * i + (100)):Int32) + (1)):Int32", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("((2) * i + (100)):PrimInt", GetInductionInfo(add, 0).c_str()); + EXPECT_STREQ("((2) * i + (100)):Int32", GetInductionInfo(add, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(pol, 0).c_str()); } @@ -469,32 +465,32 @@ TEST_F(InductionVarAnalysisTest, FindPolynomialInductionAndDerived) { k_header->AddInput(constant1_); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, sub), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, sub), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* pol = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, basic_[0]), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, basic_[0]), 0); k_header->AddInput(pol); PerformInductionVarAnalysis(); // Note, only the phi in the cycle and derived are classified. - EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):PrimInt) + (1)):PrimInt", + EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):Int32) + (1)):Int32", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):PrimInt) + ((1) + (100))):PrimInt", + EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):Int32) + ((1) + (100))):Int32", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):PrimInt) + ((1) - (1))):PrimInt", + EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):Int32) + ((1) - (1))):Int32", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("poly(sum_lt((( - (1)) * i + (0)):PrimInt) + ((1) - (1))):PrimInt", + EXPECT_STREQ("poly(sum_lt((( - (1)) * i + (0)):Int32) + ((1) - (1))):Int32", GetInductionInfo(neg, 0).c_str()); - EXPECT_STREQ("poly(sum_lt(((2) * i + (0)):PrimInt) + (2)):PrimInt", + EXPECT_STREQ("poly(sum_lt(((2) * i + (0)):Int32) + (2)):Int32", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("poly(sum_lt(((4) * i + (0)):PrimInt) + (4)):PrimInt", + EXPECT_STREQ("poly(sum_lt(((4) * i + (0)):Int32) + (4)):Int32", GetInductionInfo(shl, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(pol, 0).c_str()); } @@ -512,21 +508,21 @@ TEST_F(InductionVarAnalysisTest, AddPolynomial) { k_header->AddInput(constant7_); HInstruction* add1 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, k_header), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, k_header), 0); HInstruction* add2 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, add1, k_header), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, add1, k_header), 0); HInstruction* add3 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, basic_[0]), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, basic_[0]), 0); k_header->AddInput(add3); PerformInductionVarAnalysis(); // Note, only the phi in the cycle and added-derived are classified. - EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):PrimInt) + (7)):PrimInt", + EXPECT_STREQ("poly(sum_lt(((1) * i + (0)):Int32) + (7)):Int32", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("poly(sum_lt((((1) + (1)) * i + (0)):PrimInt) + ((7) + (7))):PrimInt", + EXPECT_STREQ("poly(sum_lt((((1) + (1)) * i + (0)):Int32) + ((7) + (7))):Int32", GetInductionInfo(add1, 0).c_str()); EXPECT_STREQ( - "poly(sum_lt(((((1) + (1)) + (1)) * i + (0)):PrimInt) + (((7) + (7)) + (7))):PrimInt", + "poly(sum_lt(((((1) + (1)) + (1)) * i + (0)):Int32) + (((7) + (7)) + (7))):Int32", GetInductionInfo(add2, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(add3, 0).c_str()); } @@ -542,12 +538,12 @@ TEST_F(InductionVarAnalysisTest, FindGeometricMulInduction) { k_header->AddInput(constant1_); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, k_header, constant100_), 0); k_header->AddInput(mul); PerformInductionVarAnalysis(); - EXPECT_STREQ("geo((1) * 100 ^ i + (0)):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("geo((100) * 100 ^ i + (0)):PrimInt", GetInductionInfo(mul, 0).c_str()); + EXPECT_STREQ("geo((1) * 100 ^ i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("geo((100) * 100 ^ i + (0)):Int32", GetInductionInfo(mul, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindGeometricShlInductionAndDerived) { @@ -567,31 +563,31 @@ TEST_F(InductionVarAnalysisTest, FindGeometricShlInductionAndDerived) { k_header->AddInput(constant1_); HInstruction* add1 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* shl1 = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* add2 = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, shl1, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, shl1, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, shl1, constant1_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, shl1, constant1_), 0); HInstruction* neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, sub), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, sub), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, shl1, constant2_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, shl1, constant2_), 0); HInstruction* shl2 = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, shl1, constant2_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, shl1, constant2_), 0); k_header->AddInput(shl1); PerformInductionVarAnalysis(); - EXPECT_STREQ("geo((1) * 2 ^ i + (0)):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("geo((1) * 2 ^ i + (1)):PrimInt", GetInductionInfo(add1, 0).c_str()); - EXPECT_STREQ("geo((2) * 2 ^ i + (0)):PrimInt", GetInductionInfo(shl1, 0).c_str()); - EXPECT_STREQ("geo((2) * 2 ^ i + (100)):PrimInt", GetInductionInfo(add2, 0).c_str()); - EXPECT_STREQ("geo((2) * 2 ^ i + ((0) - (1))):PrimInt", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("geo(( - (2)) * 2 ^ i + ( - ((0) - (1)))):PrimInt", + EXPECT_STREQ("geo((1) * 2 ^ i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("geo((1) * 2 ^ i + (1)):Int32", GetInductionInfo(add1, 0).c_str()); + EXPECT_STREQ("geo((2) * 2 ^ i + (0)):Int32", GetInductionInfo(shl1, 0).c_str()); + EXPECT_STREQ("geo((2) * 2 ^ i + (100)):Int32", GetInductionInfo(add2, 0).c_str()); + EXPECT_STREQ("geo((2) * 2 ^ i + ((0) - (1))):Int32", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("geo(( - (2)) * 2 ^ i + ( - ((0) - (1)))):Int32", GetInductionInfo(neg, 0).c_str()); - EXPECT_STREQ("geo(((2) * (2)) * 2 ^ i + (0)):PrimInt", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("geo(((2) * (4)) * 2 ^ i + (0)):PrimInt", GetInductionInfo(shl2, 0).c_str()); + EXPECT_STREQ("geo(((2) * (2)) * 2 ^ i + (0)):Int32", GetInductionInfo(mul, 0).c_str()); + EXPECT_STREQ("geo(((2) * (4)) * 2 ^ i + (0)):Int32", GetInductionInfo(shl2, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindGeometricDivInductionAndDerived) { @@ -610,24 +606,24 @@ TEST_F(InductionVarAnalysisTest, FindGeometricDivInductionAndDerived) { k_header->AddInput(constant1_); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, sub), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, sub), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* div = InsertInstruction( - new (&allocator_) HDiv(Primitive::kPrimInt, k_header, constant100_, kNoDexPc), 0); + new (GetAllocator()) HDiv(DataType::Type::kInt32, k_header, constant100_, kNoDexPc), 0); k_header->AddInput(div); PerformInductionVarAnalysis(); // Note, only the phi in the cycle and direct additive derived are classified. - EXPECT_STREQ("geo((1) * 100 ^ -i + (0)):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("geo((1) * 100 ^ -i + (100)):PrimInt", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("geo((1) * 100 ^ -i + ((0) - (1))):PrimInt", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("geo((1) * 100 ^ -i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("geo((1) * 100 ^ -i + (100)):Int32", GetInductionInfo(add, 0).c_str()); + EXPECT_STREQ("geo((1) * 100 ^ -i + ((0) - (1))):Int32", GetInductionInfo(sub, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(neg, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(mul, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(shl, 0).c_str()); @@ -645,12 +641,12 @@ TEST_F(InductionVarAnalysisTest, FindGeometricShrInduction) { k_header->AddInput(constant100_); HInstruction* shr = InsertInstruction( - new (&allocator_) HShr(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HShr(DataType::Type::kInt32, k_header, constant1_), 0); k_header->AddInput(shr); PerformInductionVarAnalysis(); // Note, only the phi in the cycle is classified. - EXPECT_STREQ("geo((100) * 2 ^ -i + (0)):PrimInt", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("geo((100) * 2 ^ -i + (0)):Int32", GetInductionInfo(k_header, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(shr, 0).c_str()); } @@ -665,7 +661,7 @@ TEST_F(InductionVarAnalysisTest, FindNotGeometricShrInduction) { k_header->AddInput(constantm1_); HInstruction* shr = InsertInstruction( - new (&allocator_) HShr(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HShr(DataType::Type::kInt32, k_header, constant1_), 0); k_header->AddInput(shr); PerformInductionVarAnalysis(); @@ -689,27 +685,32 @@ TEST_F(InductionVarAnalysisTest, FindRemWrapAroundInductionAndDerived) { k_header->AddInput(constant100_); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* neg = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, sub), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, sub), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, k_header, constant2_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, k_header, constant2_), 0); HInstruction* rem = InsertInstruction( - new (&allocator_) HRem(Primitive::kPrimInt, k_header, constant7_, kNoDexPc), 0); + new (GetAllocator()) HRem(DataType::Type::kInt32, k_header, constant7_, kNoDexPc), 0); k_header->AddInput(rem); PerformInductionVarAnalysis(); // Note, only the phi in the cycle and derived are classified. - EXPECT_STREQ("wrap((100), ((100) % (7))):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("wrap(((100) + (100)), (((100) % (7)) + (100))):PrimInt", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("wrap(((100) - (1)), (((100) % (7)) - (1))):PrimInt", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("wrap(( - ((100) - (1))), ( - (((100) % (7)) - (1)))):PrimInt", GetInductionInfo(neg, 0).c_str()); - EXPECT_STREQ("wrap(((100) * (2)), (((100) % (7)) * (2))):PrimInt", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("wrap(((100) * (4)), (((100) % (7)) * (4))):PrimInt", GetInductionInfo(shl, 0).c_str()); + EXPECT_STREQ("wrap((100), ((100) % (7))):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("wrap(((100) + (100)), (((100) % (7)) + (100))):Int32", + GetInductionInfo(add, 0).c_str()); + EXPECT_STREQ("wrap(((100) - (1)), (((100) % (7)) - (1))):Int32", + GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("wrap(( - ((100) - (1))), ( - (((100) % (7)) - (1)))):Int32", + GetInductionInfo(neg, 0).c_str()); + EXPECT_STREQ("wrap(((100) * (2)), (((100) % (7)) * (2))):Int32", + GetInductionInfo(mul, 0).c_str()); + EXPECT_STREQ("wrap(((100) * (4)), (((100) % (7)) * (4))):Int32", + GetInductionInfo(shl, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(rem, 0).c_str()); } @@ -726,15 +727,15 @@ TEST_F(InductionVarAnalysisTest, FindFirstOrderWrapAroundInduction) { HInstruction* store = InsertArrayStore(k_header, 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, constant100_, basic_[0]), 0); k_header->AddInput(sub); PerformInductionVarAnalysis(); - EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)):Int32):Int32", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), (( - (1)) * i + (100)):Int32):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); - EXPECT_STREQ("(( - (1)) * i + (100)):PrimInt", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("(( - (1)) * i + (100)):Int32", GetInductionInfo(sub, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) { @@ -755,11 +756,11 @@ TEST_F(InductionVarAnalysisTest, FindSecondOrderWrapAroundInduction) { HInstruction* store = InsertArrayStore(k_header, 0); k_header->AddInput(t); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, constant100_, basic_[0], 0), 0); t->AddInput(sub); PerformInductionVarAnalysis(); - EXPECT_STREQ("wrap((0), wrap((100), (( - (1)) * i + (100)):PrimInt):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), wrap((100), (( - (1)) * i + (100)):Int32):Int32):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); } @@ -780,34 +781,34 @@ TEST_F(InductionVarAnalysisTest, FindWrapAroundDerivedInduction) { k_header->AddInput(constant0_); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, k_header, constant100_), 0); HInstruction* shl1 = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* neg1 = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, k_header), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, k_header), 0); HInstruction* shl2 = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, basic_[0], constant1_), 0); HInstruction* neg2 = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, shl2), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, shl2), 0); k_header->AddInput(shl2); PerformInductionVarAnalysis(); - EXPECT_STREQ("wrap((100), ((2) * i + (100)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((100), ((2) * i + (100)):Int32):Int32", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("wrap(((0) - (100)), ((2) * i + ((0) - (100))):PrimInt):PrimInt", + EXPECT_STREQ("wrap(((0) - (100)), ((2) * i + ((0) - (100))):Int32):Int32", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("wrap((0), (((2) * (100)) * i + (0)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), (((2) * (100)) * i + (0)):Int32):Int32", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("wrap((0), (((2) * (2)) * i + (0)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), (((2) * (2)) * i + (0)):Int32):Int32", GetInductionInfo(shl1, 0).c_str()); - EXPECT_STREQ("wrap((0), (( - (2)) * i + (0)):PrimInt):PrimInt", + EXPECT_STREQ("wrap((0), (( - (2)) * i + (0)):Int32):Int32", GetInductionInfo(neg1, 0).c_str()); - EXPECT_STREQ("((2) * i + (0)):PrimInt", GetInductionInfo(shl2, 0).c_str()); - EXPECT_STREQ("(( - (2)) * i + (0)):PrimInt", GetInductionInfo(neg2, 0).c_str()); + EXPECT_STREQ("((2) * i + (0)):Int32", GetInductionInfo(shl2, 0).c_str()); + EXPECT_STREQ("(( - (2)) * i + (0)):Int32", GetInductionInfo(neg2, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindPeriodicInduction) { @@ -834,8 +835,8 @@ TEST_F(InductionVarAnalysisTest, FindPeriodicInduction) { t->AddInput(k_header); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (100)):PrimInt", GetInductionInfo(store1->InputAt(1), 0).c_str()); - EXPECT_STREQ("periodic((100), (0)):PrimInt", GetInductionInfo(store2->InputAt(1), 0).c_str()); + EXPECT_STREQ("periodic((0), (100)):Int32", GetInductionInfo(store1->InputAt(1), 0).c_str()); + EXPECT_STREQ("periodic((100), (0)):Int32", GetInductionInfo(store2->InputAt(1), 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { @@ -851,12 +852,12 @@ TEST_F(InductionVarAnalysisTest, FindIdiomaticPeriodicInduction) { HInstruction* store = InsertArrayStore(k_header, 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k_header), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, constant1_, k_header), 0); k_header->AddInput(sub); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Int32", GetInductionInfo(sub, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindXorPeriodicInduction) { @@ -872,12 +873,12 @@ TEST_F(InductionVarAnalysisTest, FindXorPeriodicInduction) { HInstruction* store = InsertArrayStore(k_header, 0); HInstruction* x = InsertInstruction( - new (&allocator_) HXor(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HXor(DataType::Type::kInt32, k_header, constant1_), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Int32", GetInductionInfo(store->InputAt(1), 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Int32", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindXorConstantLeftPeriodicInduction) { @@ -891,12 +892,12 @@ TEST_F(InductionVarAnalysisTest, FindXorConstantLeftPeriodicInduction) { k_header->AddInput(constant1_); HInstruction* x = InsertInstruction( - new (&allocator_) HXor(Primitive::kPrimInt, constant1_, k_header), 0); + new (GetAllocator()) HXor(DataType::Type::kInt32, constant1_, k_header), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((1), ((1) ^ (1))):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic(((1) ^ (1)), (1)):PrimInt", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((1), ((1) ^ (1))):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic(((1) ^ (1)), (1)):Int32", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindXor100PeriodicInduction) { @@ -910,12 +911,12 @@ TEST_F(InductionVarAnalysisTest, FindXor100PeriodicInduction) { k_header->AddInput(constant1_); HInstruction* x = InsertInstruction( - new (&allocator_) HXor(Primitive::kPrimInt, k_header, constant100_), 0); + new (GetAllocator()) HXor(DataType::Type::kInt32, k_header, constant100_), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((1), ((1) ^ (100))):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic(((1) ^ (100)), (1)):PrimInt", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((1), ((1) ^ (100))):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic(((1) ^ (100)), (1)):Int32", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindBooleanEqPeriodicInduction) { @@ -928,12 +929,12 @@ TEST_F(InductionVarAnalysisTest, FindBooleanEqPeriodicInduction) { HPhi* k_header = InsertLoopPhi(0, 0); k_header->AddInput(constant0_); - HInstruction* x = InsertInstruction(new (&allocator_) HEqual(k_header, constant0_), 0); + HInstruction* x = InsertInstruction(new (GetAllocator()) HEqual(k_header, constant0_), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimBoolean", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Bool", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Bool", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindBooleanEqConstantLeftPeriodicInduction) { @@ -946,12 +947,12 @@ TEST_F(InductionVarAnalysisTest, FindBooleanEqConstantLeftPeriodicInduction) { HPhi* k_header = InsertLoopPhi(0, 0); k_header->AddInput(constant0_); - HInstruction* x = InsertInstruction(new (&allocator_) HEqual(constant0_, k_header), 0); + HInstruction* x = InsertInstruction(new (GetAllocator()) HEqual(constant0_, k_header), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimBoolean", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Bool", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Bool", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindBooleanNePeriodicInduction) { @@ -964,12 +965,12 @@ TEST_F(InductionVarAnalysisTest, FindBooleanNePeriodicInduction) { HPhi* k_header = InsertLoopPhi(0, 0); k_header->AddInput(constant0_); - HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(k_header, constant1_), 0); + HInstruction* x = InsertInstruction(new (GetAllocator()) HNotEqual(k_header, constant1_), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimBoolean", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Bool", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Bool", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindBooleanNeConstantLeftPeriodicInduction) { @@ -982,12 +983,12 @@ TEST_F(InductionVarAnalysisTest, FindBooleanNeConstantLeftPeriodicInduction) { HPhi* k_header = InsertLoopPhi(0, 0); k_header->AddInput(constant0_); - HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(constant1_, k_header), 0); + HInstruction* x = InsertInstruction(new (GetAllocator()) HNotEqual(constant1_, k_header), 0); k_header->AddInput(x); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimBoolean", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Bool", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Bool", GetInductionInfo(x, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { @@ -1007,30 +1008,30 @@ TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) { k_header->AddInput(constant0_); HInstruction* neg1 = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, k_header), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, k_header), 0); HInstruction* idiom = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k_header), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, constant1_, k_header), 0); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, idiom, constant100_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, idiom, constant100_), 0); HInstruction* sub = InsertInstruction( - new (&allocator_) HSub(Primitive::kPrimInt, idiom, constant100_), 0); + new (GetAllocator()) HSub(DataType::Type::kInt32, idiom, constant100_), 0); HInstruction* mul = InsertInstruction( - new (&allocator_) HMul(Primitive::kPrimInt, idiom, constant100_), 0); + new (GetAllocator()) HMul(DataType::Type::kInt32, idiom, constant100_), 0); HInstruction* shl = InsertInstruction( - new (&allocator_) HShl(Primitive::kPrimInt, idiom, constant1_), 0); + new (GetAllocator()) HShl(DataType::Type::kInt32, idiom, constant1_), 0); HInstruction* neg2 = InsertInstruction( - new (&allocator_) HNeg(Primitive::kPrimInt, idiom), 0); + new (GetAllocator()) HNeg(DataType::Type::kInt32, idiom), 0); k_header->AddInput(idiom); PerformInductionVarAnalysis(); - EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(k_header, 0).c_str()); - EXPECT_STREQ("periodic((0), ( - (1))):PrimInt", GetInductionInfo(neg1, 0).c_str()); - EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(idiom, 0).c_str()); - EXPECT_STREQ("periodic(((1) + (100)), (100)):PrimInt", GetInductionInfo(add, 0).c_str()); - EXPECT_STREQ("periodic(((1) - (100)), ((0) - (100))):PrimInt", GetInductionInfo(sub, 0).c_str()); - EXPECT_STREQ("periodic((100), (0)):PrimInt", GetInductionInfo(mul, 0).c_str()); - EXPECT_STREQ("periodic((2), (0)):PrimInt", GetInductionInfo(shl, 0).c_str()); - EXPECT_STREQ("periodic(( - (1)), (0)):PrimInt", GetInductionInfo(neg2, 0).c_str()); + EXPECT_STREQ("periodic((0), (1)):Int32", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("periodic((0), ( - (1))):Int32", GetInductionInfo(neg1, 0).c_str()); + EXPECT_STREQ("periodic((1), (0)):Int32", GetInductionInfo(idiom, 0).c_str()); + EXPECT_STREQ("periodic(((1) + (100)), (100)):Int32", GetInductionInfo(add, 0).c_str()); + EXPECT_STREQ("periodic(((1) - (100)), ((0) - (100))):Int32", GetInductionInfo(sub, 0).c_str()); + EXPECT_STREQ("periodic((100), (0)):Int32", GetInductionInfo(mul, 0).c_str()); + EXPECT_STREQ("periodic((2), (0)):Int32", GetInductionInfo(shl, 0).c_str()); + EXPECT_STREQ("periodic(( - (1)), (0)):Int32", GetInductionInfo(neg2, 0).c_str()); } TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { @@ -1052,7 +1053,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { } HInstruction* inc = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k_header[9]), 9); + new (GetAllocator()) HAdd(DataType::Type::kInt32, constant1_, k_header[9]), 9); HInstruction* store = InsertArrayStore(inc, 9); for (int d = 0; d < 10; d++) { @@ -1063,7 +1064,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { // Avoid exact phi number, since that depends on the SSA building phase. std::regex r("\\(\\(1\\) \\* i \\+ " - "\\(\\(1\\) \\+ \\(\\d+:Phi\\)\\)\\):PrimInt"); + "\\(\\(1\\) \\+ \\(\\d+:Phi\\)\\)\\):Int32"); for (int d = 0; d < 10; d++) { if (d == 9) { @@ -1071,7 +1072,7 @@ TEST_F(InductionVarAnalysisTest, FindDeepLoopInduction) { } else { EXPECT_STREQ("", GetInductionInfo(store->InputAt(1), d).c_str()); } - EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[d], d).c_str()); + EXPECT_STREQ("((1) * i + (1)):Int32", GetInductionInfo(increment_[d], d).c_str()); // Trip-count. EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(d).c_str()); } @@ -1086,15 +1087,15 @@ TEST_F(InductionVarAnalysisTest, ByteInductionIntLoopControl) { // } BuildLoopNest(1); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, basic_[0], kNoDexPc), 0); HInstruction* store1 = InsertArrayStore(conv, 0); HInstruction* store2 = InsertArrayStore(basic_[0], 0); PerformInductionVarAnalysis(); // Regular int induction (i) is transferred over conversion into byte induction (k). - EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); - EXPECT_STREQ("((1) * i + (0)):PrimInt", GetInductionInfo(store2->InputAt(1), 0).c_str()); - EXPECT_STREQ("((1) * i + (1)):PrimInt", GetInductionInfo(increment_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int8", GetInductionInfo(store1->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int32", GetInductionInfo(store2->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (1)):Int32", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. EXPECT_TRUE(IsNarrowingLinear(store1->InputAt(1))); @@ -1117,17 +1118,17 @@ TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) { // } BuildLoopNest(1); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], kNoDexPc), 0); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, basic_[0], kNoDexPc), 0); HInstruction* store1 = InsertArrayStore(conv, 0); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, conv, constant1_), 0); HInstruction* store2 = InsertArrayStore(add, 0); PerformInductionVarAnalysis(); // Byte induction (k) is detected, but it does not transfer over the addition, // since this may yield out-of-type values. - EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Int8", GetInductionInfo(store1->InputAt(1), 0).c_str()); EXPECT_STREQ("", GetInductionInfo(store2->InputAt(1), 0).c_str()); // Narrowing detected. @@ -1147,15 +1148,15 @@ TEST_F(InductionVarAnalysisTest, ByteInduction) { k_header->AddInput(graph_->GetIntConstant(-128)); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, add, kNoDexPc), 0); k_header->AddInput(conv); PerformInductionVarAnalysis(); // Byte induction (k) is detected, but it does not transfer over the addition, // since this may yield out-of-type values. - EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(k_header, 0).c_str()); + EXPECT_STREQ("((1) * i + (-128)):Int8", GetInductionInfo(k_header, 0).c_str()); EXPECT_STREQ("", GetInductionInfo(add, 0).c_str()); // Narrowing detected. @@ -1175,9 +1176,9 @@ TEST_F(InductionVarAnalysisTest, NoByteInduction1) { k_header->AddInput(graph_->GetIntConstant(-129)); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, k_header, constant1_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, k_header, constant1_), 0); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, add, kNoDexPc), 0); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, add, kNoDexPc), 0); k_header->AddInput(conv); PerformInductionVarAnalysis(); @@ -1197,9 +1198,9 @@ TEST_F(InductionVarAnalysisTest, NoByteInduction2) { k_header->AddInput(constant0_); HInstruction* conv = InsertInstruction( - new (&allocator_) HTypeConversion(Primitive::kPrimByte, k_header, kNoDexPc), 0); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, k_header, kNoDexPc), 0); HInstruction* add = InsertInstruction( - new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0); + new (GetAllocator()) HAdd(DataType::Type::kInt32, conv, constant1_), 0); k_header->AddInput(add); PerformInductionVarAnalysis(); @@ -1216,13 +1217,13 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl1) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(127), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (-128)):Int8", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. @@ -1242,13 +1243,13 @@ TEST_F(InductionVarAnalysisTest, ByteLoopControl2) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(128), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimByte, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt8, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (-128)):PrimByte", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (-128)):Int8", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. @@ -1268,13 +1269,13 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl1) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(32767), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt16, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (-32768)):Int16", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. @@ -1294,13 +1295,13 @@ TEST_F(InductionVarAnalysisTest, ShortLoopControl2) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(32768), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimShort, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kInt16, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (-32768)):PrimShort", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (-32768)):Int16", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. @@ -1319,13 +1320,13 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl1) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(65535), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kUint16, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Uint16", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. @@ -1344,13 +1345,13 @@ TEST_F(InductionVarAnalysisTest, CharLoopControl2) { HInstruction* ifs = loop_header_[0]->GetLastInstruction()->GetPrevious(); ifs->ReplaceInput(graph_->GetIntConstant(65536), 1); HInstruction* conv = - new (&allocator_) HTypeConversion(Primitive::kPrimChar, increment_[0], kNoDexPc); + new (GetAllocator()) HTypeConversion(DataType::Type::kUint16, increment_[0], kNoDexPc); loop_body_[0]->InsertInstructionBefore(conv, increment_[0]->GetNext()); basic_[0]->ReplaceInput(conv, 1); PerformInductionVarAnalysis(); // Recorded at the phi, but not transferred to increment. - EXPECT_STREQ("((1) * i + (0)):PrimChar", GetInductionInfo(basic_[0], 0).c_str()); + EXPECT_STREQ("((1) * i + (0)):Uint16", GetInductionInfo(basic_[0], 0).c_str()); EXPECT_STREQ("", GetInductionInfo(increment_[0], 0).c_str()); // Narrowing detected. diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc index f35aace3a9..99dec11240 100644 --- a/compiler/optimizing/induction_var_range.cc +++ b/compiler/optimizing/induction_var_range.cc @@ -87,8 +87,10 @@ static bool IsGEZero(HInstruction* instruction) { IsGEZero(instruction->InputAt(1)); case Intrinsics::kMathAbsInt: case Intrinsics::kMathAbsLong: - // Instruction ABS(x) is >= 0. - return true; + // Instruction ABS(>=0) is >= 0. + // NOTE: ABS(minint) = minint prevents assuming + // >= 0 without looking at the argument. + return IsGEZero(instruction->InputAt(0)); default: break; } @@ -155,15 +157,16 @@ static bool IsConstantValue(InductionVarRange::Value v) { } /** Corrects a value for type to account for arithmetic wrap-around in lower precision. */ -static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, Primitive::Type type) { +static InductionVarRange::Value CorrectForType(InductionVarRange::Value v, DataType::Type type) { switch (type) { - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimByte: { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: { // Constants within range only. // TODO: maybe some room for improvement, like allowing widening conversions - int32_t min = Primitive::MinValueOfIntegralType(type); - int32_t max = Primitive::MaxValueOfIntegralType(type); + int32_t min = DataType::MinValueOfIntegralType(type); + int32_t max = DataType::MaxValueOfIntegralType(type); return (IsConstantValue(v) && min <= v.b_constant && v.b_constant <= max) ? v : InductionVarRange::Value(); @@ -214,10 +217,11 @@ bool InductionVarRange::GetInductionRange(HInstruction* context, // bounds check elimination, will have truncated higher precision induction // at their use point already). switch (info->type) { - case Primitive::kPrimInt: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimByte: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: break; default: return false; @@ -414,7 +418,8 @@ HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop, if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) { if (taken_test != nullptr) { HInstruction* zero = graph->GetConstant(trip->type, 0); - trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc)); + ArenaAllocator* allocator = graph->GetAllocator(); + trip_expr = Insert(block, new (allocator) HSelect(taken_test, trip_expr, zero, kNoDexPc)); } return trip_expr; } @@ -668,6 +673,15 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, return AddValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), Value(static_cast<int32_t>(value))); } + } else if (instruction->IsSub()) { + // Incorporate suitable constants in the chased value. + if (IsInt64AndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) { + return SubValue(Value(static_cast<int32_t>(value)), + GetFetch(instruction->InputAt(1), trip, in_body, !is_min)); + } else if (IsInt64AndGet(instruction->InputAt(1), &value) && CanLongValueFitIntoInt(value)) { + return SubValue(GetFetch(instruction->InputAt(0), trip, in_body, is_min), + Value(static_cast<int32_t>(value))); + } } else if (instruction->IsArrayLength()) { // Exploit length properties when chasing constants or chase into a new array declaration. if (chase_hint_ == nullptr) { @@ -678,8 +692,8 @@ InductionVarRange::Value InductionVarRange::GetFetch(HInstruction* instruction, } else if (instruction->IsTypeConversion()) { // Since analysis is 32-bit (or narrower), chase beyond widening along the path. // For example, this discovers the length in: for (long i = 0; i < a.length; i++); - if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt && - instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) { + if (instruction->AsTypeConversion()->GetInputType() == DataType::Type::kInt32 && + instruction->AsTypeConversion()->GetResultType() == DataType::Type::kInt64) { return GetFetch(instruction->InputAt(0), trip, in_body, is_min); } } @@ -1040,13 +1054,13 @@ bool InductionVarRange::GenerateLastValuePolynomial(HInductionVarAnalysis::Induc HInstruction* c = nullptr; if (GenerateCode(info->op_b, nullptr, graph, block, graph ? &c : nullptr, false, false)) { if (graph != nullptr) { - Primitive::Type type = info->type; + DataType::Type type = info->type; int64_t sum = a * ((m * (m - 1)) / 2) + b * m; - if (type != Primitive::kPrimLong) { + if (type != DataType::Type::kInt64) { sum = static_cast<int32_t>(sum); // okay to truncate } *result = - Insert(block, new (graph->GetArena()) HAdd(type, graph->GetConstant(type, sum), c)); + Insert(block, new (graph->GetAllocator()) HAdd(type, graph->GetConstant(type, sum), c)); } return true; } @@ -1070,16 +1084,16 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct if (GenerateCode(info->op_a, nullptr, graph, block, &opa, false, false) && GenerateCode(info->op_b, nullptr, graph, block, &opb, false, false)) { if (graph != nullptr) { - Primitive::Type type = info->type; + DataType::Type type = info->type; // Compute f ^ m for known maximum index value m. bool overflow = false; int64_t fpow = IntPow(f, m, &overflow); if (info->operation == HInductionVarAnalysis::kDiv) { // For division, any overflow truncates to zero. - if (overflow || (type != Primitive::kPrimLong && !CanLongValueFitIntoInt(fpow))) { + if (overflow || (type != DataType::Type::kInt64 && !CanLongValueFitIntoInt(fpow))) { fpow = 0; } - } else if (type != Primitive::kPrimLong) { + } else if (type != DataType::Type::kInt64) { // For multiplication, okay to truncate to required precision. DCHECK(info->operation == HInductionVarAnalysis::kMul); fpow = static_cast<int32_t>(fpow); @@ -1091,12 +1105,13 @@ bool InductionVarRange::GenerateLastValueGeometric(HInductionVarAnalysis::Induct } else { // Last value: a * f ^ m + b or a * f ^ -m + b. HInstruction* e = nullptr; + ArenaAllocator* allocator = graph->GetAllocator(); if (info->operation == HInductionVarAnalysis::kMul) { - e = new (graph->GetArena()) HMul(type, opa, graph->GetConstant(type, fpow)); + e = new (allocator) HMul(type, opa, graph->GetConstant(type, fpow)); } else { - e = new (graph->GetArena()) HDiv(type, opa, graph->GetConstant(type, fpow), kNoDexPc); + e = new (allocator) HDiv(type, opa, graph->GetConstant(type, fpow), kNoDexPc); } - *result = Insert(block, new (graph->GetArena()) HAdd(type, Insert(block, e), opb)); + *result = Insert(block, new (allocator) HAdd(type, Insert(block, e), opb)); } } return true; @@ -1150,7 +1165,7 @@ bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::Inducti } // Don't rely on FP arithmetic to be precise, unless the full period // consist of pre-computed expressions only. - if (info->type == Primitive::kPrimFloat || info->type == Primitive::kPrimDouble) { + if (info->type == DataType::Type::kFloat32 || info->type == DataType::Type::kFloat64) { if (!all_invariants) { return false; } @@ -1176,19 +1191,21 @@ bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::Inducti GenerateCode(trip->op_a, nullptr, graph, block, graph ? &t : nullptr, false, false)) { // During actual code generation (graph != nullptr), generate is_even ? x : y. if (graph != nullptr) { - Primitive::Type type = trip->type; + DataType::Type type = trip->type; + ArenaAllocator* allocator = graph->GetAllocator(); HInstruction* msk = - Insert(block, new (graph->GetArena()) HAnd(type, t, graph->GetConstant(type, 1))); + Insert(block, new (allocator) HAnd(type, t, graph->GetConstant(type, 1))); HInstruction* is_even = - Insert(block, new (graph->GetArena()) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc)); - *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x, y, kNoDexPc)); + Insert(block, new (allocator) HEqual(msk, graph->GetConstant(type, 0), kNoDexPc)); + *result = Insert(block, new (graph->GetAllocator()) HSelect(is_even, x, y, kNoDexPc)); } // Guard select with taken test if needed. if (*needs_taken_test) { HInstruction* is_taken = nullptr; if (GenerateCode(trip->op_b, nullptr, graph, block, graph ? &is_taken : nullptr, false, false)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HSelect(is_taken, *result, x, kNoDexPc)); + ArenaAllocator* allocator = graph->GetAllocator(); + *result = Insert(block, new (allocator) HSelect(is_taken, *result, x, kNoDexPc)); } *needs_taken_test = false; // taken care of } else { @@ -1213,7 +1230,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, return true; } // Handle current operation. - Primitive::Type type = info->type; + DataType::Type type = info->type; HInstruction* opa = nullptr; HInstruction* opb = nullptr; switch (info->induction_class) { @@ -1237,25 +1254,25 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, HInstruction* operation = nullptr; switch (info->operation) { case HInductionVarAnalysis::kAdd: - operation = new (graph->GetArena()) HAdd(type, opa, opb); break; + operation = new (graph->GetAllocator()) HAdd(type, opa, opb); break; case HInductionVarAnalysis::kSub: - operation = new (graph->GetArena()) HSub(type, opa, opb); break; + operation = new (graph->GetAllocator()) HSub(type, opa, opb); break; case HInductionVarAnalysis::kMul: - operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break; + operation = new (graph->GetAllocator()) HMul(type, opa, opb, kNoDexPc); break; case HInductionVarAnalysis::kDiv: - operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break; + operation = new (graph->GetAllocator()) HDiv(type, opa, opb, kNoDexPc); break; case HInductionVarAnalysis::kRem: - operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break; + operation = new (graph->GetAllocator()) HRem(type, opa, opb, kNoDexPc); break; case HInductionVarAnalysis::kXor: - operation = new (graph->GetArena()) HXor(type, opa, opb); break; + operation = new (graph->GetAllocator()) HXor(type, opa, opb); break; case HInductionVarAnalysis::kLT: - operation = new (graph->GetArena()) HLessThan(opa, opb); break; + operation = new (graph->GetAllocator()) HLessThan(opa, opb); break; case HInductionVarAnalysis::kLE: - operation = new (graph->GetArena()) HLessThanOrEqual(opa, opb); break; + operation = new (graph->GetAllocator()) HLessThanOrEqual(opa, opb); break; case HInductionVarAnalysis::kGT: - operation = new (graph->GetArena()) HGreaterThan(opa, opb); break; + operation = new (graph->GetAllocator()) HGreaterThan(opa, opb); break; case HInductionVarAnalysis::kGE: - operation = new (graph->GetArena()) HGreaterThanOrEqual(opa, opb); break; + operation = new (graph->GetAllocator()) HGreaterThanOrEqual(opa, opb); break; default: LOG(FATAL) << "unknown operation"; } @@ -1267,7 +1284,7 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, case HInductionVarAnalysis::kNeg: if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) { if (graph != nullptr) { - *result = Insert(block, new (graph->GetArena()) HNeg(type, opb)); + *result = Insert(block, new (graph->GetAllocator()) HNeg(type, opb)); } return true; } @@ -1293,9 +1310,9 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, } else if (in_body) { if (GenerateCode(info->op_a, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { + ArenaAllocator* allocator = graph->GetAllocator(); *result = - Insert(block, - new (graph->GetArena()) HSub(type, opb, graph->GetConstant(type, 1))); + Insert(block, new (allocator) HSub(type, opb, graph->GetConstant(type, 1))); } return true; } @@ -1320,15 +1337,16 @@ bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info, if (GenerateCode(trip, trip, graph, block, &opa, in_body, is_min_a) && GenerateCode(info->op_b, trip, graph, block, &opb, in_body, is_min)) { if (graph != nullptr) { + ArenaAllocator* allocator = graph->GetAllocator(); HInstruction* oper; if (stride_value == 1) { - oper = new (graph->GetArena()) HAdd(type, opa, opb); + oper = new (allocator) HAdd(type, opa, opb); } else if (stride_value == -1) { - oper = new (graph->GetArena()) HSub(type, opb, opa); + oper = new (graph->GetAllocator()) HSub(type, opb, opa); } else { HInstruction* mul = - new (graph->GetArena()) HMul(type, graph->GetConstant(type, stride_value), opa); - oper = new (graph->GetArena()) HAdd(type, Insert(block, mul), opb); + new (allocator) HMul(type, graph->GetConstant(type, stride_value), opa); + oper = new (allocator) HAdd(type, Insert(block, mul), opb); } *result = Insert(block, oper); } diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h index ab1772bf15..0b980f596a 100644 --- a/compiler/optimizing/induction_var_range.h +++ b/compiler/optimizing/induction_var_range.h @@ -151,6 +151,16 @@ class InductionVarRange { } /** + * Checks if the given phi instruction has been classified as anything by + * induction variable analysis. Returns false for anything that cannot be + * classified statically, such as reductions or other complex cycles. + */ + bool IsClassified(HPhi* phi) const { + HLoopInformation* lp = phi->GetBlock()->GetLoopInformation(); // closest enveloping loop + return (lp != nullptr) && (induction_analysis_->LookupInfo(lp, phi) != nullptr); + } + + /** * Checks if header logic of a loop terminates. Sets trip-count tc if known. */ bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const; diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc index 67d2093829..e5bc6ef22c 100644 --- a/compiler/optimizing/induction_var_range_test.cc +++ b/compiler/optimizing/induction_var_range_test.cc @@ -14,10 +14,11 @@ * limitations under the License. */ +#include "induction_var_range.h" + #include "base/arena_allocator.h" #include "builder.h" #include "induction_var_analysis.h" -#include "induction_var_range.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -28,13 +29,11 @@ using Value = InductionVarRange::Value; /** * Fixture class for the InductionVarRange tests. */ -class InductionVarRangeTest : public CommonCompilerTest { +class InductionVarRangeTest : public OptimizingUnitTest { public: InductionVarRangeTest() - : pool_(), - allocator_(&pool_), - graph_(CreateGraph(&allocator_)), - iva_(new (&allocator_) HInductionVarAnalysis(graph_)), + : graph_(CreateGraph()), + iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)), range_(iva_) { BuildGraph(); } @@ -60,22 +59,22 @@ class InductionVarRangeTest : public CommonCompilerTest { /** Constructs bare minimum graph. */ void BuildGraph() { graph_->SetNumberOfVRegs(1); - entry_block_ = new (&allocator_) HBasicBlock(graph_); - exit_block_ = new (&allocator_) HBasicBlock(graph_); + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_block_); graph_->AddBlock(exit_block_); graph_->SetEntryBlock(entry_block_); graph_->SetExitBlock(exit_block_); // Two parameters. - x_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimInt); + x_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); entry_block_->AddInstruction(x_); - y_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimInt); + y_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); entry_block_->AddInstruction(y_); // Set arbitrary range analysis hint while testing private methods. SetHint(x_); @@ -84,13 +83,13 @@ class InductionVarRangeTest : public CommonCompilerTest { /** Constructs loop with given upper bound. */ void BuildLoop(int32_t lower, HInstruction* upper, int32_t stride) { // Control flow. - loop_preheader_ = new (&allocator_) HBasicBlock(graph_); + loop_preheader_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_preheader_); - loop_header_ = new (&allocator_) HBasicBlock(graph_); + loop_header_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_header_); - loop_body_ = new (&allocator_) HBasicBlock(graph_); + loop_body_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(loop_body_); - HBasicBlock* return_block = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* return_block = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(return_block); entry_block_->AddSuccessor(loop_preheader_); loop_preheader_->AddSuccessor(loop_header_); @@ -99,23 +98,24 @@ class InductionVarRangeTest : public CommonCompilerTest { loop_body_->AddSuccessor(loop_header_); return_block->AddSuccessor(exit_block_); // Instructions. - loop_preheader_->AddInstruction(new (&allocator_) HGoto()); - HPhi* phi = new (&allocator_) HPhi(&allocator_, 0, 0, Primitive::kPrimInt); + loop_preheader_->AddInstruction(new (GetAllocator()) HGoto()); + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); loop_header_->AddPhi(phi); phi->AddInput(graph_->GetIntConstant(lower)); // i = l if (stride > 0) { - condition_ = new (&allocator_) HLessThan(phi, upper); // i < u + condition_ = new (GetAllocator()) HLessThan(phi, upper); // i < u } else { - condition_ = new (&allocator_) HGreaterThan(phi, upper); // i > u + condition_ = new (GetAllocator()) HGreaterThan(phi, upper); // i > u } loop_header_->AddInstruction(condition_); - loop_header_->AddInstruction(new (&allocator_) HIf(condition_)); - increment_ = new (&allocator_) HAdd(Primitive::kPrimInt, phi, graph_->GetIntConstant(stride)); + loop_header_->AddInstruction(new (GetAllocator()) HIf(condition_)); + increment_ = + new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, graph_->GetIntConstant(stride)); loop_body_->AddInstruction(increment_); // i += s phi->AddInput(increment_); - loop_body_->AddInstruction(new (&allocator_) HGoto()); - return_block->AddInstruction(new (&allocator_) HReturnVoid()); - exit_block_->AddInstruction(new (&allocator_) HExit()); + loop_body_->AddInstruction(new (GetAllocator()) HGoto()); + return_block->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); } /** Constructs SSA and performs induction variable analysis. */ @@ -172,7 +172,7 @@ class InductionVarRangeTest : public CommonCompilerTest { return iva_->CreateTripCount(op, CreateConst(tc), CreateInvariant('<', CreateConst(0), CreateConst(tc)), - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a linear a * i + b induction. */ @@ -182,7 +182,7 @@ class InductionVarRangeTest : public CommonCompilerTest { CreateConst(a), CreateConst(b), nullptr, - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a polynomial sum(a * i + b) + c induction. */ @@ -192,7 +192,7 @@ class InductionVarRangeTest : public CommonCompilerTest { CreateLinear(a, b), CreateConst(c), nullptr, - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a geometric a * f^i + b induction. */ @@ -203,7 +203,7 @@ class InductionVarRangeTest : public CommonCompilerTest { CreateConst(a), CreateConst(b), graph_->GetIntConstant(f), - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a range [lo, hi] using a periodic induction. */ @@ -213,7 +213,7 @@ class InductionVarRangeTest : public CommonCompilerTest { CreateConst(lo), CreateConst(hi), nullptr, - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a wrap-around induction consisting of a constant, followed by info. */ @@ -225,7 +225,7 @@ class InductionVarRangeTest : public CommonCompilerTest { CreateConst(initial), info, nullptr, - Primitive::kPrimInt); + DataType::Type::kInt32); } /** Constructs a wrap-around induction consisting of a constant, followed by a range. */ @@ -302,8 +302,6 @@ class InductionVarRangeTest : public CommonCompilerTest { Value MaxValue(Value v1, Value v2) { return range_.MergeVal(v1, v2, false); } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; HBasicBlock* entry_block_; HBasicBlock* exit_block_; @@ -703,9 +701,9 @@ TEST_F(InductionVarRangeTest, MaxValue) { TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { // We pass a bogus constant for the class to avoid mocking one. - HInstruction* new_array = new (&allocator_) HNewArray(x_, x_, 0); + HInstruction* new_array = new (GetAllocator()) HNewArray(x_, x_, 0); entry_block_->AddInstruction(new_array); - HInstruction* array_length = new (&allocator_) HArrayLength(new_array, 0); + HInstruction* array_length = new (GetAllocator()) HArrayLength(new_array, 0); entry_block_->AddInstruction(array_length); // With null hint: yields extreme constants. const int32_t max_value = std::numeric_limits<int32_t>::max(); @@ -722,6 +720,29 @@ TEST_F(InductionVarRangeTest, ArrayLengthAndHints) { ExpectEqual(Value(x_, 1, 0), GetMax(CreateFetch(array_length), nullptr)); } +TEST_F(InductionVarRangeTest, AddOrSubAndConstant) { + HInstruction* add = new (GetAllocator()) + HAdd(DataType::Type::kInt32, x_, graph_->GetIntConstant(-1)); + HInstruction* alt = new (GetAllocator()) + HAdd(DataType::Type::kInt32, graph_->GetIntConstant(-1), x_); + HInstruction* sub = new (GetAllocator()) + HSub(DataType::Type::kInt32, x_, graph_->GetIntConstant(1)); + HInstruction* rev = new (GetAllocator()) + HSub(DataType::Type::kInt32, graph_->GetIntConstant(1), x_); + entry_block_->AddInstruction(add); + entry_block_->AddInstruction(alt); + entry_block_->AddInstruction(sub); + entry_block_->AddInstruction(rev); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(add), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(add), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(alt), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(alt), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMin(CreateFetch(sub), nullptr)); + ExpectEqual(Value(x_, 1, -1), GetMax(CreateFetch(sub), nullptr)); + ExpectEqual(Value(x_, -1, 1), GetMin(CreateFetch(rev), nullptr)); + ExpectEqual(Value(x_, -1, 1), GetMax(CreateFetch(rev), nullptr)); +} + // // Tests on public methods. // diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 18390cc4d4..4fc7262265 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -21,10 +21,11 @@ #include "builder.h" #include "class_linker.h" #include "constant_folding.h" +#include "data_type-inl.h" #include "dead_code_elimination.h" #include "dex/inline_method_analyser.h" -#include "dex/verified_method.h" #include "dex/verification_results.h" +#include "dex/verified_method.h" #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" @@ -38,10 +39,10 @@ #include "optimizing_compiler.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" +#include "scoped_thread_state_change-inl.h" #include "sharpening.h" #include "ssa_builder.h" #include "ssa_phi_elimination.h" -#include "scoped_thread_state_change-inl.h" #include "thread.h" namespace art { @@ -75,7 +76,7 @@ static constexpr bool kUseAOTInlineCaches = true; #define LOG_TRY() LOG_INTERNAL("Try inlinining call: ") #define LOG_NOTE() LOG_INTERNAL("Note: ") #define LOG_SUCCESS() LOG_INTERNAL("Success: ") -#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ") +#define LOG_FAIL(stats_ptr, stat) MaybeRecordStat(stats_ptr, stat); LOG_INTERNAL("Fail: ") #define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ") std::string HInliner::DepthString(int line) const { @@ -293,7 +294,7 @@ static dex::TypeIndex FindClassIndexIn(mirror::Class* cls, // as there may be different class loaders. So only return the index if it's // the right class already resolved with the class loader. if (index.IsValid()) { - ObjPtr<mirror::Class> resolved = ClassLinker::LookupResolvedType( + ObjPtr<mirror::Class> resolved = compilation_unit.GetClassLinker()->LookupResolvedType( index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get()); if (resolved != cls) { index = dex::TypeIndex::Invalid(); @@ -391,6 +392,58 @@ ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) { return single_impl; } +static bool IsMethodUnverified(CompilerDriver* const compiler_driver, ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + if (!method->GetDeclaringClass()->IsVerified()) { + if (Runtime::Current()->UseJitCompilation()) { + // We're at runtime, we know this is cold code if the class + // is not verified, so don't bother analyzing. + return true; + } + uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); + if (!compiler_driver->IsMethodVerifiedWithoutFailures( + method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { + // Method has soft or hard failures, don't analyze. + return true; + } + } + return false; +} + +static bool AlwaysThrows(CompilerDriver* const compiler_driver, ArtMethod* method) + REQUIRES_SHARED(Locks::mutator_lock_) { + DCHECK(method != nullptr); + // Skip non-compilable and unverified methods. + if (!method->IsCompilable() || IsMethodUnverified(compiler_driver, method)) { + return false; + } + // Skip native methods, methods with try blocks, and methods that are too large. + CodeItemDataAccessor accessor(method->DexInstructionData()); + if (!accessor.HasCodeItem() || + accessor.TriesSize() != 0 || + accessor.InsnsSizeInCodeUnits() > kMaximumNumberOfTotalInstructions) { + return false; + } + // Scan for exits. + bool throw_seen = false; + for (const DexInstructionPcPair& pair : accessor) { + switch (pair.Inst().Opcode()) { + case Instruction::RETURN: + case Instruction::RETURN_VOID: + case Instruction::RETURN_WIDE: + case Instruction::RETURN_OBJECT: + case Instruction::RETURN_VOID_NO_BARRIER: + return false; // found regular control flow back + case Instruction::THROW: + throw_seen = true; + break; + default: + break; + } + } + return throw_seen; +} + bool HInliner::TryInline(HInvoke* invoke_instruction) { if (invoke_instruction->IsInvokeUnresolved() || invoke_instruction->IsInvokePolymorphic()) { @@ -430,20 +483,29 @@ bool HInliner::TryInline(HInvoke* invoke_instruction) { } if (actual_method != nullptr) { + // Single target. bool result = TryInlineAndReplace(invoke_instruction, actual_method, ReferenceTypeInfo::CreateInvalid(), /* do_rtp */ true, cha_devirtualize); - if (result && !invoke_instruction->IsInvokeStaticOrDirect()) { - if (cha_devirtualize) { - // Add dependency due to devirtulization. We've assumed resolved_method - // has single implementation. - outermost_graph_->AddCHASingleImplementationDependency(resolved_method); - MaybeRecordStat(kCHAInline); - } else { - MaybeRecordStat(kInlinedInvokeVirtualOrInterface); + if (result) { + // Successfully inlined. + if (!invoke_instruction->IsInvokeStaticOrDirect()) { + if (cha_devirtualize) { + // Add dependency due to devirtualization. We've assumed resolved_method + // has single implementation. + outermost_graph_->AddCHASingleImplementationDependency(resolved_method); + MaybeRecordStat(stats_, MethodCompilationStat::kCHAInline); + } else { + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvokeVirtualOrInterface); + } } + } else if (!cha_devirtualize && AlwaysThrows(compiler_driver_, actual_method)) { + // Set always throws property for non-inlined method call with single target + // (unless it was obtained through CHA, because that would imply we have + // to add the CHA dependency, which seems not worth it). + invoke_instruction->SetAlwaysThrows(true); } return result; } @@ -532,7 +594,7 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, } case kInlineCacheMonomorphic: { - MaybeRecordStat(kMonomorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kMonomorphicCall); if (UseOnlyPolymorphicInliningWithNoDeopt()) { return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); } else { @@ -541,7 +603,7 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, } case kInlineCachePolymorphic: { - MaybeRecordStat(kPolymorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kPolymorphicCall); return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache); } @@ -550,7 +612,7 @@ bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file, << "Interface or virtual call to " << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex()) << " is megamorphic and not inlined"; - MaybeRecordStat(kMegamorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kMegamorphicCall); return false; } @@ -681,7 +743,7 @@ HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile( << "is invalid in location" << dex_cache->GetDexFile()->GetLocation(); return kInlineCacheNoData; } - ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType( + ObjPtr<mirror::Class> clazz = caller_compilation_unit_.GetClassLinker()->LookupResolvedType( class_ref.type_index, dex_cache, caller_compilation_unit_.GetClassLoader().Get()); @@ -704,10 +766,10 @@ HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker, uint32_t dex_pc) const { ArtField* field = class_linker->GetClassRoot(ClassLinker::kJavaLangObject)->GetInstanceField(0); DCHECK_EQ(std::string(field->GetName()), "shadow$_klass_"); - HInstanceFieldGet* result = new (graph_->GetArena()) HInstanceFieldGet( + HInstanceFieldGet* result = new (graph_->GetAllocator()) HInstanceFieldGet( receiver, field, - Primitive::kPrimNot, + DataType::Type::kReference, field->GetOffset(), field->IsVolatile(), field->GetDexFieldIndex(), @@ -754,7 +816,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, dex::TypeIndex class_index = FindClassIndexIn( GetMonomorphicType(classes), caller_compilation_unit_); if (!class_index.IsValid()) { - LOG_FAIL(kNotInlinedDexCache) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache) << "Call to " << ArtMethod::PrettyMethod(resolved_method) << " from inline cache is not inlined because its class is not" << " accessible to the caller"; @@ -803,7 +865,7 @@ bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction, /* is_first_run */ false); rtp_fixup.Run(); - MaybeRecordStat(kInlinedMonomorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedMonomorphicCall); return true; } @@ -811,12 +873,12 @@ void HInliner::AddCHAGuard(HInstruction* invoke_instruction, uint32_t dex_pc, HInstruction* cursor, HBasicBlock* bb_cursor) { - HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetArena()) - HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc); - HInstruction* compare = new (graph_->GetArena()) HNotEqual( + HShouldDeoptimizeFlag* deopt_flag = new (graph_->GetAllocator()) + HShouldDeoptimizeFlag(graph_->GetAllocator(), dex_pc); + HInstruction* compare = new (graph_->GetAllocator()) HNotEqual( deopt_flag, graph_->GetIntConstant(0, dex_pc)); - HInstruction* deopt = new (graph_->GetArena()) HDeoptimize( - graph_->GetArena(), compare, DeoptimizationKind::kCHA, dex_pc); + HInstruction* deopt = new (graph_->GetAllocator()) HDeoptimize( + graph_->GetAllocator(), compare, DeoptimizationKind::kCHA, dex_pc); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(deopt_flag, cursor); @@ -864,20 +926,20 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, // Note that we will just compare the classes, so we don't need Java semantics access checks. // Note that the type index and the dex file are relative to the method this type guard is // inlined into. - HLoadClass* load_class = new (graph_->GetArena()) HLoadClass(graph_->GetCurrentMethod(), - class_index, - caller_dex_file, - klass, - is_referrer, - invoke_instruction->GetDexPc(), - /* needs_access_check */ false); + HLoadClass* load_class = new (graph_->GetAllocator()) HLoadClass(graph_->GetCurrentMethod(), + class_index, + caller_dex_file, + klass, + is_referrer, + invoke_instruction->GetDexPc(), + /* needs_access_check */ false); HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind( load_class, codegen_, compiler_driver_, caller_compilation_unit_); DCHECK(kind != HLoadClass::LoadKind::kInvalid) << "We should always be able to reference a class for inline caches"; - // Insert before setting the kind, as setting the kind affects the inputs. - bb_cursor->InsertInstructionAfter(load_class, receiver_class); + // Load kind must be set before inserting the instruction into the graph. load_class->SetLoadKind(kind); + bb_cursor->InsertInstructionAfter(load_class, receiver_class); // In AOT mode, we will most likely load the class from BSS, which will involve a call // to the runtime. In this case, the load instruction will need an environment so copy // it from the invoke instruction. @@ -886,11 +948,11 @@ HInstruction* HInliner::AddTypeGuard(HInstruction* receiver, load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); } - HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class); + HNotEqual* compare = new (graph_->GetAllocator()) HNotEqual(load_class, receiver_class); bb_cursor->InsertInstructionAfter(compare, load_class); if (with_deoptimization) { - HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - graph_->GetArena(), + HDeoptimize* deoptimize = new (graph_->GetAllocator()) HDeoptimize( + graph_->GetAllocator(), compare, receiver, Runtime::Current()->IsAotCompiler() @@ -993,7 +1055,7 @@ bool HInliner::TryInlinePolymorphicCall(HInvoke* invoke_instruction, return false; } - MaybeRecordStat(kInlinedPolymorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall); // Run type propagation to get the guards typed. ReferenceTypePropagation rtp_fixup(graph_, @@ -1011,7 +1073,7 @@ void HInliner::CreateDiamondPatternForPolymorphicInline(HInstruction* compare, uint32_t dex_pc = invoke_instruction->GetDexPc(); HBasicBlock* cursor_block = compare->GetBlock(); HBasicBlock* original_invoke_block = invoke_instruction->GetBlock(); - ArenaAllocator* allocator = graph_->GetArena(); + ArenaAllocator* allocator = graph_->GetAllocator(); // Spit the block after the compare: `cursor_block` will now be the start of the diamond, // and the returned block is the start of the then branch (that could contain multiple blocks). @@ -1143,10 +1205,10 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( HInstanceFieldGet* receiver_class = BuildGetReceiverClass( class_linker, receiver, invoke_instruction->GetDexPc()); - Primitive::Type type = Is64BitInstructionSet(graph_->GetInstructionSet()) - ? Primitive::kPrimLong - : Primitive::kPrimInt; - HClassTableGet* class_table_get = new (graph_->GetArena()) HClassTableGet( + DataType::Type type = Is64BitInstructionSet(graph_->GetInstructionSet()) + ? DataType::Type::kInt64 + : DataType::Type::kInt32; + HClassTableGet* class_table_get = new (graph_->GetAllocator()) HClassTableGet( receiver_class, type, invoke_instruction->IsInvokeVirtual() ? HClassTableGet::TableKind::kVTable @@ -1155,7 +1217,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( invoke_instruction->GetDexPc()); HConstant* constant; - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { constant = graph_->GetLongConstant( reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc()); } else { @@ -1163,7 +1225,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( reinterpret_cast<intptr_t>(actual_method), invoke_instruction->GetDexPc()); } - HNotEqual* compare = new (graph_->GetArena()) HNotEqual(class_table_get, constant); + HNotEqual* compare = new (graph_->GetAllocator()) HNotEqual(class_table_get, constant); if (cursor != nullptr) { bb_cursor->InsertInstructionAfter(receiver_class, cursor); } else { @@ -1175,8 +1237,8 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( if (outermost_graph_->IsCompilingOsr()) { CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction); } else { - HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize( - graph_->GetArena(), + HDeoptimize* deoptimize = new (graph_->GetAllocator()) HDeoptimize( + graph_->GetAllocator(), compare, receiver, DeoptimizationKind::kJitSameTarget, @@ -1199,7 +1261,7 @@ bool HInliner::TryInlinePolymorphicCallToSameTarget( /* is_first_run */ false); rtp_fixup.Run(); - MaybeRecordStat(kInlinedPolymorphicCall); + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedPolymorphicCall); LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod(); return true; @@ -1210,11 +1272,49 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, ReferenceTypeInfo receiver_type, bool do_rtp, bool cha_devirtualize) { + DCHECK(!invoke_instruction->IsIntrinsic()); HInstruction* return_replacement = nullptr; uint32_t dex_pc = invoke_instruction->GetDexPc(); HInstruction* cursor = invoke_instruction->GetPrevious(); HBasicBlock* bb_cursor = invoke_instruction->GetBlock(); - if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { + bool should_remove_invoke_instruction = false; + + // If invoke_instruction is devirtualized to a different method, give intrinsics + // another chance before we try to inline it. + bool wrong_invoke_type = false; + if (invoke_instruction->GetResolvedMethod() != method && + IntrinsicsRecognizer::Recognize(invoke_instruction, method, &wrong_invoke_type)) { + MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); + if (invoke_instruction->IsInvokeInterface()) { + // We don't intrinsify an invoke-interface directly. + // Replace the invoke-interface with an invoke-virtual. + HInvokeVirtual* new_invoke = new (graph_->GetAllocator()) HInvokeVirtual( + graph_->GetAllocator(), + invoke_instruction->GetNumberOfArguments(), + invoke_instruction->GetType(), + invoke_instruction->GetDexPc(), + invoke_instruction->GetDexMethodIndex(), // Use interface method's dex method index. + method, + method->GetMethodIndex()); + HInputsRef inputs = invoke_instruction->GetInputs(); + for (size_t index = 0; index != inputs.size(); ++index) { + new_invoke->SetArgumentAt(index, inputs[index]); + } + invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); + new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); + if (invoke_instruction->GetType() == DataType::Type::kReference) { + new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); + } + // Run intrinsic recognizer again to set new_invoke's intrinsic. + IntrinsicsRecognizer::Recognize(new_invoke, method, &wrong_invoke_type); + DCHECK_NE(new_invoke->GetIntrinsic(), Intrinsics::kNone); + return_replacement = new_invoke; + // invoke_instruction is replaced with new_invoke. + should_remove_invoke_instruction = true; + } else { + // invoke_instruction is intrinsified and stays. + } + } else if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) { if (invoke_instruction->IsInvokeInterface()) { DCHECK(!method->IsProxyMethod()); // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always @@ -1236,11 +1336,11 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile(); uint32_t dex_method_index = FindMethodIndexIn( method, caller_dex_file, invoke_instruction->GetDexMethodIndex()); - if (dex_method_index == DexFile::kDexNoIndex) { + if (dex_method_index == dex::kDexNoIndex) { return false; } - HInvokeVirtual* new_invoke = new (graph_->GetArena()) HInvokeVirtual( - graph_->GetArena(), + HInvokeVirtual* new_invoke = new (graph_->GetAllocator()) HInvokeVirtual( + graph_->GetAllocator(), invoke_instruction->GetNumberOfArguments(), invoke_instruction->GetType(), invoke_instruction->GetDexPc(), @@ -1253,23 +1353,31 @@ bool HInliner::TryInlineAndReplace(HInvoke* invoke_instruction, } invoke_instruction->GetBlock()->InsertInstructionBefore(new_invoke, invoke_instruction); new_invoke->CopyEnvironmentFrom(invoke_instruction->GetEnvironment()); - if (invoke_instruction->GetType() == Primitive::kPrimNot) { + if (invoke_instruction->GetType() == DataType::Type::kReference) { new_invoke->SetReferenceTypeInfo(invoke_instruction->GetReferenceTypeInfo()); } return_replacement = new_invoke; + // invoke_instruction is replaced with new_invoke. + should_remove_invoke_instruction = true; } else { // TODO: Consider sharpening an invoke virtual once it is not dependent on the // compiler driver. return false; } + } else { + // invoke_instruction is inlined. + should_remove_invoke_instruction = true; } + if (cha_devirtualize) { AddCHAGuard(invoke_instruction, dex_pc, cursor, bb_cursor); } if (return_replacement != nullptr) { invoke_instruction->ReplaceWith(return_replacement); } - invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + if (should_remove_invoke_instruction) { + invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction); + } FixUpReturnReferenceType(method, return_replacement); if (do_rtp && ReturnTypeMoreSpecific(invoke_instruction, return_replacement)) { // Actual return value has a more specific type than the method's declared @@ -1300,14 +1408,14 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, ReferenceTypeInfo receiver_type, HInstruction** return_replacement) { if (method->IsProxyMethod()) { - LOG_FAIL(kNotInlinedProxy) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedProxy) << "Method " << method->PrettyMethod() << " is not inlined because of unimplemented inline support for proxy methods."; return false; } if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) { - LOG_FAIL(kNotInlinedRecursiveBudget) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRecursiveBudget) << "Method " << method->PrettyMethod() << " is not inlined because it has reached its recursive call budget."; @@ -1321,10 +1429,10 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) { LOG_SUCCESS() << "Successfully replaced pattern of invoke " << method->PrettyMethod(); - MaybeRecordStat(kReplacedInvokeWithSimplePattern); + MaybeRecordStat(stats_, MethodCompilationStat::kReplacedInvokeWithSimplePattern); return true; } - LOG_FAIL(kNotInlinedWont) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedWont) << "Won't inline " << method->PrettyMethod() << " in " << outer_compilation_unit_.GetDexFile()->GetLocation() << " (" << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from " @@ -1334,56 +1442,53 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, bool same_dex_file = IsSameDexFile(*outer_compilation_unit_.GetDexFile(), *method->GetDexFile()); - const DexFile::CodeItem* code_item = method->GetCodeItem(); + CodeItemDataAccessor accessor(method->DexInstructionData()); - if (code_item == nullptr) { + if (!accessor.HasCodeItem()) { LOG_FAIL_NO_STAT() << "Method " << method->PrettyMethod() << " is not inlined because it is native"; return false; } size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits(); - if (code_item->insns_size_in_code_units_ > inline_max_code_units) { - LOG_FAIL(kNotInlinedCodeItem) + if (accessor.InsnsSizeInCodeUnits() > inline_max_code_units) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCodeItem) << "Method " << method->PrettyMethod() << " is not inlined because its code item is too big: " - << code_item->insns_size_in_code_units_ + << accessor.InsnsSizeInCodeUnits() << " > " << inline_max_code_units; return false; } - if (code_item->tries_size_ != 0) { - LOG_FAIL(kNotInlinedTryCatch) + if (accessor.TriesSize() != 0) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch) << "Method " << method->PrettyMethod() << " is not inlined because of try block"; return false; } if (!method->IsCompilable()) { - LOG_FAIL(kNotInlinedNotVerified) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) << "Method " << method->PrettyMethod() << " has soft failures un-handled by the compiler, so it cannot be inlined"; + return false; } - if (!method->GetDeclaringClass()->IsVerified()) { - uint16_t class_def_idx = method->GetDeclaringClass()->GetDexClassDefIndex(); - if (Runtime::Current()->UseJitCompilation() || - !compiler_driver_->IsMethodVerifiedWithoutFailures( - method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) { - LOG_FAIL(kNotInlinedNotVerified) - << "Method " << method->PrettyMethod() - << " couldn't be verified, so it cannot be inlined"; - return false; - } + if (IsMethodUnverified(compiler_driver_, method)) { + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedNotVerified) + << "Method " << method->PrettyMethod() + << " couldn't be verified, so it cannot be inlined"; + return false; } if (invoke_instruction->IsInvokeStaticOrDirect() && invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) { // Case of a static method that cannot be inlined because it implicitly // requires an initialization check of its declaring class. - LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod() - << " is not inlined because it is static and requires a clinit" - << " check that cannot be emitted due to Dex cache limitations"; + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache) + << "Method " << method->PrettyMethod() + << " is not inlined because it is static and requires a clinit" + << " check that cannot be emitted due to Dex cache limitations"; return false; } @@ -1393,7 +1498,7 @@ bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction, } LOG_SUCCESS() << method->PrettyMethod(); - MaybeRecordStat(kInlinedInvoke); + MaybeRecordStat(stats_, MethodCompilationStat::kInlinedInvoke); return true; } @@ -1403,7 +1508,7 @@ static HInstruction* GetInvokeInputForArgVRegIndex(HInvoke* invoke_instruction, size_t input_index = 0; for (size_t i = 0; i < arg_vreg_index; ++i, ++input_index) { DCHECK_LT(input_index, invoke_instruction->GetNumberOfArguments()); - if (Primitive::Is64BitType(invoke_instruction->InputAt(input_index)->GetType())) { + if (DataType::Is64BitType(invoke_instruction->InputAt(input_index)->GetType())) { ++i; DCHECK_NE(i, arg_vreg_index); } @@ -1423,7 +1528,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, switch (inline_method.opcode) { case kInlineOpNop: - DCHECK_EQ(invoke_instruction->GetType(), Primitive::kPrimVoid); + DCHECK_EQ(invoke_instruction->GetType(), DataType::Type::kVoid); *return_replacement = nullptr; break; case kInlineOpReturnArg: @@ -1516,7 +1621,7 @@ bool HInliner::TryPatternSubstitution(HInvoke* invoke_instruction, DCHECK(obj != nullptr) << "only non-static methods can have a constructor fence"; HConstructorFence* constructor_fence = - new (graph_->GetArena()) HConstructorFence(obj, kNoDexPc, graph_->GetArena()); + new (graph_->GetAllocator()) HConstructorFence(obj, kNoDexPc, graph_->GetAllocator()); invoke_instruction->GetBlock()->InsertInstructionBefore(constructor_fence, invoke_instruction); } @@ -1538,10 +1643,10 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, ArtField* resolved_field = class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false); DCHECK(resolved_field != nullptr); - HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet( + HInstanceFieldGet* iget = new (graph_->GetAllocator()) HInstanceFieldGet( obj, resolved_field, - resolved_field->GetTypeAsPrimitiveType(), + DataType::FromShorty(resolved_field->GetTypeDescriptor()[0]), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, @@ -1550,7 +1655,7 @@ HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index, // Read barrier generates a runtime call in slow path and we need a valid // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537. /* dex_pc */ 0); - if (iget->GetType() == Primitive::kPrimNot) { + if (iget->GetType() == DataType::Type::kReference) { // Use the same dex_cache that we used for field lookup as the hint_dex_cache. Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache()); ReferenceTypePropagation rtp(graph_, @@ -1578,11 +1683,11 @@ HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index, DCHECK(referrer->IsConstructor()); *is_final = resolved_field->IsFinal(); } - HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet( + HInstanceFieldSet* iput = new (graph_->GetAllocator()) HInstanceFieldSet( obj, value, resolved_field, - resolved_field->GetTypeAsPrimitiveType(), + DataType::FromShorty(resolved_field->GetTypeDescriptor()[0]), resolved_field->GetOffset(), resolved_field->IsVolatile(), field_index, @@ -1612,6 +1717,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, const DexFile::CodeItem* code_item = resolved_method->GetCodeItem(); const DexFile& callee_dex_file = *resolved_method->GetDexFile(); uint32_t method_index = resolved_method->GetDexMethodIndex(); + CodeItemDebugInfoAccessor code_item_accessor(resolved_method->DexInstructionDebugInfo()); ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker(); Handle<mirror::DexCache> dex_cache = NewHandleIfDifferent(resolved_method->GetDexCache(), caller_compilation_unit_.GetDexCache(), @@ -1640,8 +1746,9 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } const int32_t caller_instruction_counter = graph_->GetCurrentInstructionId(); - HGraph* callee_graph = new (graph_->GetArena()) HGraph( - graph_->GetArena(), + HGraph* callee_graph = new (graph_->GetAllocator()) HGraph( + graph_->GetAllocator(), + graph_->GetArenaStack(), callee_dex_file, method_index, compiler_driver_->GetInstructionSet(), @@ -1658,26 +1765,24 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (stats_ != nullptr) { // Reuse one object for all inline attempts from this caller to keep Arena memory usage low. if (inline_stats_ == nullptr) { - void* storage = graph_->GetArena()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); + void* storage = graph_->GetAllocator()->Alloc<OptimizingCompilerStats>(kArenaAllocMisc); inline_stats_ = new (storage) OptimizingCompilerStats; } else { inline_stats_->Reset(); } } HGraphBuilder builder(callee_graph, + code_item_accessor, &dex_compilation_unit, &outer_compilation_unit_, - resolved_method->GetDexFile(), - *code_item, compiler_driver_, codegen_, inline_stats_, - resolved_method->GetQuickenedInfo(class_linker->GetImagePointerSize()), - dex_cache, + resolved_method->GetQuickenedInfo(), handles_); if (builder.BuildGraph() != kAnalysisSuccess) { - LOG_FAIL(kNotInlinedCannotBuild) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedCannotBuild) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be built, so cannot be inlined"; return false; @@ -1685,7 +1790,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph, compiler_driver_->GetInstructionSet())) { - LOG_FAIL(kNotInlinedRegisterAllocator) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedRegisterAllocator) << "Method " << callee_dex_file.PrettyMethod(method_index) << " cannot be inlined because of the register allocator"; return false; @@ -1711,7 +1816,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } else if (argument->IsDoubleConstant()) { current->ReplaceWith( callee_graph->GetDoubleConstant(argument->AsDoubleConstant()->GetValue())); - } else if (argument->GetType() == Primitive::kPrimNot) { + } else if (argument->GetType() == DataType::Type::kReference) { if (!resolved_method->IsStatic() && parameter_index == 0 && receiver_type.IsValid()) { run_rtp = true; current->SetReferenceTypeInfo(receiver_type); @@ -1738,7 +1843,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, HBasicBlock* exit_block = callee_graph->GetExitBlock(); if (exit_block == nullptr) { - LOG_FAIL(kNotInlinedInfiniteLoop) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it has an infinite loop"; return false; @@ -1749,14 +1854,14 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (predecessor->GetLastInstruction()->IsThrow()) { if (invoke_instruction->GetBlock()->IsTryBlock()) { // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto. - LOG_FAIL(kNotInlinedTryCatch) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedTryCatch) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because one branch always throws and" << " caller is in a try/catch block"; return false; } else if (graph_->GetExitBlock() == nullptr) { // TODO(ngeoffray): Support adding HExit in the caller graph. - LOG_FAIL(kNotInlinedInfiniteLoop) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInfiniteLoop) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because one branch always throws and" << " caller does not have an exit block"; @@ -1775,7 +1880,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } if (!has_one_return) { - LOG_FAIL(kNotInlinedAlwaysThrows) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedAlwaysThrows) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it always throws"; return false; @@ -1788,7 +1893,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (block->GetLoopInformation()->IsIrreducible()) { // Don't inline methods with irreducible loops, they could prevent some // optimizations to run. - LOG_FAIL(kNotInlinedIrreducibleLoop) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedIrreducibleLoop) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it contains an irreducible loop"; return false; @@ -1797,7 +1902,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, // Don't inline methods with loops without exit, since they cause the // loop information to be computed incorrectly when updating after // inlining. - LOG_FAIL(kNotInlinedLoopWithoutExit) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedLoopWithoutExit) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it contains a loop with no exit"; return false; @@ -1808,7 +1913,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, !instr_it.Done(); instr_it.Advance()) { if (++number_of_instructions >= inlining_budget_) { - LOG_FAIL(kNotInlinedInstructionBudget) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedInstructionBudget) << "Method " << callee_dex_file.PrettyMethod(method_index) << " is not inlined because the outer method has reached" << " its instruction budget limit."; @@ -1817,7 +1922,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, HInstruction* current = instr_it.Current(); if (current->NeedsEnvironment() && (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) { - LOG_FAIL(kNotInlinedEnvironmentBudget) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedEnvironmentBudget) << "Method " << callee_dex_file.PrettyMethod(method_index) << " is not inlined because its caller has reached" << " its environment budget limit."; @@ -1827,7 +1932,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, if (current->NeedsEnvironment() && !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(), resolved_method)) { - LOG_FAIL(kNotInlinedStackMaps) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedStackMaps) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because " << current->DebugName() << " needs an environment, is in a different dex file" @@ -1836,7 +1941,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, } if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) { - LOG_FAIL(kNotInlinedDexCache) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedDexCache) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because " << current->DebugName() << " it is in a different dex file and requires access to the dex cache"; @@ -1848,7 +1953,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, current->IsUnresolvedStaticFieldSet() || current->IsUnresolvedInstanceFieldSet()) { // Entrypoint for unresolved fields does not handle inlined frames. - LOG_FAIL(kNotInlinedUnresolvedEntrypoint) + LOG_FAIL(stats_, MethodCompilationStat::kNotInlinedUnresolvedEntrypoint) << "Method " << callee_dex_file.PrettyMethod(method_index) << " could not be inlined because it is using an unresolved" << " entrypoint"; @@ -1885,7 +1990,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, // optimization that could lead to a HDeoptimize. The following optimizations do not. HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner"); HConstantFolding fold(callee_graph, "constant_folding$inliner"); - HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_); + HSharpening sharpening(callee_graph, codegen_, compiler_driver_); InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_); IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_); @@ -1920,6 +2025,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, return; } + CodeItemDataAccessor accessor(callee_graph->GetDexFile(), code_item); HInliner inliner(callee_graph, outermost_graph_, codegen_, @@ -1928,7 +2034,7 @@ void HInliner::RunOptimizations(HGraph* callee_graph, compiler_driver_, handles_, inline_stats_, - total_number_of_dex_registers_ + code_item->registers_size_, + total_number_of_dex_registers_ + accessor.RegistersSize(), total_number_of_instructions_ + number_of_instructions, this, depth_ + 1); @@ -1948,7 +2054,7 @@ static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti, declared_rti.IsStrictSupertypeOf(actual_rti); } -ReferenceTypeInfo HInliner::GetClassRTI(mirror::Class* klass) { +ReferenceTypeInfo HInliner::GetClassRTI(ObjPtr<mirror::Class> klass) { return ReferenceTypePropagation::IsAdmissible(klass) ? ReferenceTypeInfo::Create(handles_->NewHandle(klass)) : graph_->GetInexactObjectRti(); @@ -1975,10 +2081,9 @@ bool HInliner::ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* param_idx < e; ++param_idx, ++input_idx) { HInstruction* input = invoke_instruction->InputAt(input_idx); - if (input->GetType() == Primitive::kPrimNot) { - mirror::Class* param_cls = resolved_method->GetClassFromTypeIndex( - param_list->GetTypeItem(param_idx).type_idx_, - /* resolve */ false); + if (input->GetType() == DataType::Type::kReference) { + ObjPtr<mirror::Class> param_cls = resolved_method->LookupResolvedClassFromTypeIndex( + param_list->GetTypeItem(param_idx).type_idx_); if (IsReferenceTypeRefinement(GetClassRTI(param_cls), /* declared_can_be_null */ true, input)) { @@ -1994,7 +2099,7 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, HInstruction* return_replacement) { // Check the integrity of reference types and run another type propagation if needed. if (return_replacement != nullptr) { - if (return_replacement->GetType() == Primitive::kPrimNot) { + if (return_replacement->GetType() == DataType::Type::kReference) { // Test if the return type is a refinement of the declared return type. if (IsReferenceTypeRefinement(invoke_instruction->GetReferenceTypeInfo(), /* declared_can_be_null */ true, @@ -2020,14 +2125,14 @@ bool HInliner::ReturnTypeMoreSpecific(HInvoke* invoke_instruction, void HInliner::FixUpReturnReferenceType(ArtMethod* resolved_method, HInstruction* return_replacement) { if (return_replacement != nullptr) { - if (return_replacement->GetType() == Primitive::kPrimNot) { + if (return_replacement->GetType() == DataType::Type::kReference) { if (!return_replacement->GetReferenceTypeInfo().IsValid()) { // Make sure that we have a valid type for the return. We may get an invalid one when // we inline invokes with multiple branches and create a Phi for the result. // TODO: we could be more precise by merging the phi inputs but that requires // some functionality from the reference type propagation. DCHECK(return_replacement->IsPhi()); - mirror::Class* cls = resolved_method->GetReturnType(false /* resolve */); + ObjPtr<mirror::Class> cls = resolved_method->LookupResolvedReturnType(); return_replacement->SetReferenceTypeInfo(GetClassRTI(cls)); } } diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h index 67476b6956..02465d37ba 100644 --- a/compiler/optimizing/inliner.h +++ b/compiler/optimizing/inliner.h @@ -17,10 +17,10 @@ #ifndef ART_COMPILER_OPTIMIZING_INLINER_H_ #define ART_COMPILER_OPTIMIZING_INLINER_H_ -#include "dex_file_types.h" -#include "invoke_type.h" -#include "optimization.h" +#include "dex/dex_file_types.h" +#include "dex/invoke_type.h" #include "jit/profile_compilation_info.h" +#include "optimization.h" namespace art { @@ -44,8 +44,9 @@ class HInliner : public HOptimization { size_t total_number_of_dex_registers, size_t total_number_of_instructions, HInliner* parent, - size_t depth = 0) - : HOptimization(outer_graph, kInlinerPassName, stats), + size_t depth = 0, + const char* name = kInlinerPassName) + : HOptimization(outer_graph, name, stats), outermost_graph_(outermost_graph), outer_compilation_unit_(outer_compilation_unit), caller_compilation_unit_(caller_compilation_unit), @@ -207,7 +208,7 @@ class HInliner : public HOptimization { // Creates an instance of ReferenceTypeInfo from `klass` if `klass` is // admissible (see ReferenceTypePropagation::IsAdmissible for details). // Otherwise returns inexact Object RTI. - ReferenceTypeInfo GetClassRTI(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_); + ReferenceTypeInfo GetClassRTI(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_); bool ArgumentTypesMoreSpecific(HInvoke* invoke_instruction, ArtMethod* resolved_method) REQUIRES_SHARED(Locks::mutator_lock_); diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc index 8054140924..c7aef3779d 100644 --- a/compiler/optimizing/instruction_builder.cc +++ b/compiler/optimizing/instruction_builder.cc @@ -17,29 +17,72 @@ #include "instruction_builder.h" #include "art_method-inl.h" -#include "bytecode_utils.h" +#include "base/arena_bit_vector.h" +#include "base/bit_vector-inl.h" +#include "block_builder.h" #include "class_linker.h" -#include "dex_instruction-inl.h" +#include "data_type-inl.h" +#include "dex/bytecode_utils.h" +#include "dex/dex_instruction-inl.h" +#include "driver/compiler_driver-inl.h" +#include "driver/dex_compilation_unit.h" #include "driver/compiler_options.h" #include "imtable-inl.h" +#include "mirror/dex_cache.h" +#include "oat_file.h" +#include "optimizing_compiler_stats.h" #include "quicken_info.h" -#include "sharpening.h" #include "scoped_thread_state_change-inl.h" +#include "sharpening.h" +#include "ssa_builder.h" +#include "well_known_classes.h" namespace art { -void HInstructionBuilder::MaybeRecordStat(MethodCompilationStat compilation_stat) { - if (compilation_stats_ != nullptr) { - compilation_stats_->RecordStat(compilation_stat); - } +HInstructionBuilder::HInstructionBuilder(HGraph* graph, + HBasicBlockBuilder* block_builder, + SsaBuilder* ssa_builder, + const DexFile* dex_file, + const CodeItemDebugInfoAccessor& accessor, + DataType::Type return_type, + const DexCompilationUnit* dex_compilation_unit, + const DexCompilationUnit* outer_compilation_unit, + CompilerDriver* compiler_driver, + CodeGenerator* code_generator, + ArrayRef<const uint8_t> interpreter_metadata, + OptimizingCompilerStats* compiler_stats, + VariableSizedHandleScope* handles, + ScopedArenaAllocator* local_allocator) + : allocator_(graph->GetAllocator()), + graph_(graph), + handles_(handles), + dex_file_(dex_file), + code_item_accessor_(accessor), + return_type_(return_type), + block_builder_(block_builder), + ssa_builder_(ssa_builder), + compiler_driver_(compiler_driver), + code_generator_(code_generator), + dex_compilation_unit_(dex_compilation_unit), + outer_compilation_unit_(outer_compilation_unit), + quicken_info_(interpreter_metadata), + compilation_stats_(compiler_stats), + local_allocator_(local_allocator), + locals_for_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + current_block_(nullptr), + current_locals_(nullptr), + latest_result_(nullptr), + current_this_parameter_(nullptr), + loop_headers_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { + loop_headers_.reserve(kDefaultNumberOfLoops); } HBasicBlock* HInstructionBuilder::FindBlockStartingAt(uint32_t dex_pc) const { return block_builder_->GetBlockAt(dex_pc); } -inline ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) { - ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()]; +inline ScopedArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) { + ScopedArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()]; const size_t vregs = graph_->GetNumberOfVRegs(); if (locals->size() == vregs) { return locals; @@ -47,9 +90,9 @@ inline ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock return GetLocalsForWithAllocation(block, locals, vregs); } -ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation( +ScopedArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation( HBasicBlock* block, - ArenaVector<HInstruction*>* locals, + ScopedArenaVector<HInstruction*>* locals, const size_t vregs) { DCHECK_NE(locals->size(), vregs); locals->resize(vregs, nullptr); @@ -63,8 +106,8 @@ ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation( // the first throwing instruction. HInstruction* current_local_value = (*current_locals_)[i]; if (current_local_value != nullptr) { - HPhi* phi = new (arena_) HPhi( - arena_, + HPhi* phi = new (allocator_) HPhi( + allocator_, i, 0, current_local_value->GetType()); @@ -77,7 +120,7 @@ ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation( } inline HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) { - ArenaVector<HInstruction*>* locals = GetLocalsFor(block); + ScopedArenaVector<HInstruction*>* locals = GetLocalsFor(block); return (*locals)[local]; } @@ -113,8 +156,8 @@ void HInstructionBuilder::InitializeBlockLocals() { HInstruction* incoming = ValueOfLocalAt(current_block_->GetLoopInformation()->GetPreHeader(), local); if (incoming != nullptr) { - HPhi* phi = new (arena_) HPhi( - arena_, + HPhi* phi = new (allocator_) HPhi( + allocator_, local, 0, incoming->GetType()); @@ -152,8 +195,8 @@ void HInstructionBuilder::InitializeBlockLocals() { if (is_different) { HInstruction* first_input = ValueOfLocalAt(current_block_->GetPredecessors()[0], local); - HPhi* phi = new (arena_) HPhi( - arena_, + HPhi* phi = new (allocator_) HPhi( + allocator_, local, current_block_->GetPredecessors().size(), first_input->GetType()); @@ -172,7 +215,7 @@ void HInstructionBuilder::InitializeBlockLocals() { void HInstructionBuilder::PropagateLocalsToCatchBlocks() { const HTryBoundary& try_entry = current_block_->GetTryCatchInformation()->GetTryEntry(); for (HBasicBlock* catch_block : try_entry.GetExceptionHandlers()) { - ArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block); + ScopedArenaVector<HInstruction*>* handler_locals = GetLocalsFor(catch_block); DCHECK_EQ(handler_locals->size(), current_locals_->size()); for (size_t vreg = 0, e = current_locals_->size(); vreg < e; ++vreg) { HInstruction* handler_value = (*handler_locals)[vreg]; @@ -214,24 +257,24 @@ void HInstructionBuilder::InsertInstructionAtTop(HInstruction* instruction) { void HInstructionBuilder::InitializeInstruction(HInstruction* instruction) { if (instruction->NeedsEnvironment()) { - HEnvironment* environment = new (arena_) HEnvironment( - arena_, + HEnvironment* environment = new (allocator_) HEnvironment( + allocator_, current_locals_->size(), graph_->GetArtMethod(), instruction->GetDexPc(), instruction); - environment->CopyFrom(*current_locals_); + environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals_)); instruction->SetRawEnvironment(environment); } } HInstruction* HInstructionBuilder::LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc) { - HInstruction* ref = LoadLocal(register_index, Primitive::kPrimNot); + HInstruction* ref = LoadLocal(register_index, DataType::Type::kReference); if (!ref->CanBeNull()) { return ref; } - HNullCheck* null_check = new (arena_) HNullCheck(ref, dex_pc); + HNullCheck* null_check = new (allocator_) HNullCheck(ref, dex_pc); AppendInstruction(null_check); return null_check; } @@ -268,8 +311,10 @@ static bool IsBlockPopulated(HBasicBlock* block) { } bool HInstructionBuilder::Build() { - locals_for_.resize(graph_->GetBlocks().size(), - ArenaVector<HInstruction*>(arena_->Adapter(kArenaAllocGraphBuilder))); + DCHECK(code_item_accessor_.HasCodeItem()); + locals_for_.resize( + graph_->GetBlocks().size(), + ScopedArenaVector<HInstruction*>(local_allocator_->Adapter(kArenaAllocGraphBuilder))); // Find locations where we want to generate extra stackmaps for native debugging. // This allows us to generate the info only at interesting points (for example, @@ -278,9 +323,7 @@ bool HInstructionBuilder::Build() { compiler_driver_->GetCompilerOptions().GetNativeDebuggable(); ArenaBitVector* native_debug_info_locations = nullptr; if (native_debuggable) { - const uint32_t num_instructions = code_item_.insns_size_in_code_units_; - native_debug_info_locations = new (arena_) ArenaBitVector (arena_, num_instructions, false); - FindNativeDebugInfoLocations(native_debug_info_locations); + native_debug_info_locations = FindNativeDebugInfoLocations(); } for (HBasicBlock* block : graph_->GetReversePostOrder()) { @@ -291,14 +334,14 @@ bool HInstructionBuilder::Build() { if (current_block_->IsEntryBlock()) { InitializeParameters(); - AppendInstruction(new (arena_) HSuspendCheck(0u)); - AppendInstruction(new (arena_) HGoto(0u)); + AppendInstruction(new (allocator_) HSuspendCheck(0u)); + AppendInstruction(new (allocator_) HGoto(0u)); continue; } else if (current_block_->IsExitBlock()) { - AppendInstruction(new (arena_) HExit()); + AppendInstruction(new (allocator_) HExit()); continue; } else if (current_block_->IsLoopHeader()) { - HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(current_block_->GetDexPc()); + HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(current_block_->GetDexPc()); current_block_->GetLoopInformation()->SetSuspendCheck(suspend_check); // This is slightly odd because the loop header might not be empty (TryBoundary). // But we're still creating the environment with locals from the top of the block. @@ -318,31 +361,31 @@ bool HInstructionBuilder::Build() { quicken_index = block_builder_->GetQuickenIndex(block_dex_pc); } - for (CodeItemIterator it(code_item_, block_dex_pc); !it.Done(); it.Advance()) { + for (const DexInstructionPcPair& pair : code_item_accessor_.InstructionsFrom(block_dex_pc)) { if (current_block_ == nullptr) { // The previous instruction ended this block. break; } - uint32_t dex_pc = it.CurrentDexPc(); + const uint32_t dex_pc = pair.DexPc(); if (dex_pc != block_dex_pc && FindBlockStartingAt(dex_pc) != nullptr) { // This dex_pc starts a new basic block. break; } - if (current_block_->IsTryBlock() && IsThrowingDexInstruction(it.CurrentInstruction())) { + if (current_block_->IsTryBlock() && IsThrowingDexInstruction(pair.Inst())) { PropagateLocalsToCatchBlocks(); } if (native_debuggable && native_debug_info_locations->IsBitSet(dex_pc)) { - AppendInstruction(new (arena_) HNativeDebugInfo(dex_pc)); + AppendInstruction(new (allocator_) HNativeDebugInfo(dex_pc)); } - if (!ProcessDexInstruction(it.CurrentInstruction(), dex_pc, quicken_index)) { + if (!ProcessDexInstruction(pair.Inst(), dex_pc, quicken_index)) { return false; } - if (QuickenInfoTable::NeedsIndexForInstruction(&it.CurrentInstruction())) { + if (QuickenInfoTable::NeedsIndexForInstruction(&pair.Inst())) { ++quicken_index; } } @@ -352,7 +395,7 @@ bool HInstructionBuilder::Build() { // instruction of the current block is not a branching instruction. // We add an unconditional Goto to the next block. DCHECK_EQ(current_block_->GetSuccessors().size(), 1u); - AppendInstruction(new (arena_) HGoto()); + AppendInstruction(new (allocator_) HGoto()); } } @@ -361,7 +404,74 @@ bool HInstructionBuilder::Build() { return true; } -void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations) { +void HInstructionBuilder::BuildIntrinsic(ArtMethod* method) { + DCHECK(!code_item_accessor_.HasCodeItem()); + DCHECK(method->IsIntrinsic()); + + locals_for_.resize( + graph_->GetBlocks().size(), + ScopedArenaVector<HInstruction*>(local_allocator_->Adapter(kArenaAllocGraphBuilder))); + + // Fill the entry block. Do not add suspend check, we do not want a suspend + // check in intrinsics; intrinsic methods are supposed to be fast. + current_block_ = graph_->GetEntryBlock(); + InitializeBlockLocals(); + InitializeParameters(); + AppendInstruction(new (allocator_) HGoto(0u)); + + // Fill the body. + current_block_ = current_block_->GetSingleSuccessor(); + InitializeBlockLocals(); + DCHECK(!IsBlockPopulated(current_block_)); + + // Add the invoke and return instruction. Use HInvokeStaticOrDirect even + // for methods that would normally use an HInvokeVirtual (sharpen the call). + size_t in_vregs = graph_->GetNumberOfInVRegs(); + size_t number_of_arguments = + in_vregs - std::count(current_locals_->end() - in_vregs, current_locals_->end(), nullptr); + uint32_t method_idx = dex_compilation_unit_->GetDexMethodIndex(); + MethodReference target_method(dex_file_, method_idx); + HInvokeStaticOrDirect::DispatchInfo dispatch_info = { + HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + /* method_load_data */ 0u + }; + InvokeType invoke_type = dex_compilation_unit_->IsStatic() ? kStatic : kDirect; + HInvokeStaticOrDirect* invoke = new (allocator_) HInvokeStaticOrDirect( + allocator_, + number_of_arguments, + return_type_, + kNoDexPc, + method_idx, + method, + dispatch_info, + invoke_type, + target_method, + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + HandleInvoke(invoke, + in_vregs, + /* args */ nullptr, + graph_->GetNumberOfVRegs() - in_vregs, + /* is_range */ true, + dex_file_->GetMethodShorty(method_idx), + /* clinit_check */ nullptr, + /* is_unresolved */ false); + + // Add the return instruction. + if (return_type_ == DataType::Type::kVoid) { + AppendInstruction(new (allocator_) HReturnVoid()); + } else { + AppendInstruction(new (allocator_) HReturn(invoke)); + } + + // Fill the exit block. + DCHECK_EQ(current_block_->GetSingleSuccessor(), graph_->GetExitBlock()); + current_block_ = graph_->GetExitBlock(); + InitializeBlockLocals(); + AppendInstruction(new (allocator_) HExit()); +} + +ArenaBitVector* HInstructionBuilder::FindNativeDebugInfoLocations() { // The callback gets called when the line number changes. // In other words, it marks the start of new java statement. struct Callback { @@ -370,20 +480,26 @@ void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations return false; } }; - dex_file_->DecodeDebugPositionInfo(&code_item_, Callback::Position, locations); + ArenaBitVector* locations = ArenaBitVector::Create(local_allocator_, + code_item_accessor_.InsnsSizeInCodeUnits(), + /* expandable */ false, + kArenaAllocGraphBuilder); + locations->ClearAllBits(); + dex_file_->DecodeDebugPositionInfo(code_item_accessor_.DebugInfoOffset(), + Callback::Position, + locations); // Instruction-specific tweaks. - const Instruction* const begin = Instruction::At(code_item_.insns_); - const Instruction* const end = begin->RelativeAt(code_item_.insns_size_in_code_units_); - for (const Instruction* inst = begin; inst < end; inst = inst->Next()) { + for (const DexInstructionPcPair& inst : code_item_accessor_) { switch (inst->Opcode()) { case Instruction::MOVE_EXCEPTION: { // Stop in native debugger after the exception has been moved. // The compiler also expects the move at the start of basic block so // we do not want to interfere by inserting native-debug-info before it. - locations->ClearBit(inst->GetDexPc(code_item_.insns_)); - const Instruction* next = inst->Next(); - if (next < end) { - locations->SetBit(next->GetDexPc(code_item_.insns_)); + locations->ClearBit(inst.DexPc()); + DexInstructionIterator next = std::next(DexInstructionIterator(inst)); + DCHECK(next.DexPc() != inst.DexPc()); + if (next != code_item_accessor_.end()) { + locations->SetBit(next.DexPc()); } break; } @@ -391,17 +507,18 @@ void HInstructionBuilder::FindNativeDebugInfoLocations(ArenaBitVector* locations break; } } + return locations; } -HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Type type) const { +HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, DataType::Type type) const { HInstruction* value = (*current_locals_)[reg_number]; DCHECK(value != nullptr); // If the operation requests a specific type, we make sure its input is of that type. if (type != value->GetType()) { - if (Primitive::IsFloatingPointType(type)) { + if (DataType::IsFloatingPointType(type)) { value = ssa_builder_->GetFloatOrDoubleEquivalent(value, type); - } else if (type == Primitive::kPrimNot) { + } else if (type == DataType::Type::kReference) { value = ssa_builder_->GetReferenceTypeEquivalent(value); } DCHECK(value != nullptr); @@ -411,8 +528,8 @@ HInstruction* HInstructionBuilder::LoadLocal(uint32_t reg_number, Primitive::Typ } void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_value) { - Primitive::Type stored_type = stored_value->GetType(); - DCHECK_NE(stored_type, Primitive::kPrimVoid); + DataType::Type stored_type = stored_value->GetType(); + DCHECK_NE(stored_type, DataType::Type::kVoid); // Storing into vreg `reg_number` may implicitly invalidate the surrounding // registers. Consider the following cases: @@ -425,7 +542,7 @@ void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_ if (reg_number != 0) { HInstruction* local_low = (*current_locals_)[reg_number - 1]; - if (local_low != nullptr && Primitive::Is64BitType(local_low->GetType())) { + if (local_low != nullptr && DataType::Is64BitType(local_low->GetType())) { // The vreg we are storing into was previously the high vreg of a pair. // We need to invalidate its low vreg. DCHECK((*current_locals_)[reg_number] == nullptr); @@ -434,7 +551,7 @@ void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_ } (*current_locals_)[reg_number] = stored_value; - if (Primitive::Is64BitType(stored_type)) { + if (DataType::Is64BitType(stored_type)) { // We are storing a pair. Invalidate the instruction in the high vreg. (*current_locals_)[reg_number + 1] = nullptr; } @@ -443,8 +560,8 @@ void HInstructionBuilder::UpdateLocal(uint32_t reg_number, HInstruction* stored_ void HInstructionBuilder::InitializeParameters() { DCHECK(current_block_->IsEntryBlock()); - // dex_compilation_unit_ is null only when unit testing. - if (dex_compilation_unit_ == nullptr) { + // outer_compilation_unit_ is null only when unit testing. + if (outer_compilation_unit_ == nullptr) { return; } @@ -457,10 +574,10 @@ void HInstructionBuilder::InitializeParameters() { dex_file_->GetMethodId(dex_compilation_unit_->GetDexMethodIndex()); if (!dex_compilation_unit_->IsStatic()) { // Add the implicit 'this' argument, not expressed in the signature. - HParameterValue* parameter = new (arena_) HParameterValue(*dex_file_, + HParameterValue* parameter = new (allocator_) HParameterValue(*dex_file_, referrer_method_id.class_idx_, parameter_index++, - Primitive::kPrimNot, + DataType::Type::kReference, /* is_this */ true); AppendInstruction(parameter); UpdateLocal(locals_index++, parameter); @@ -473,18 +590,18 @@ void HInstructionBuilder::InitializeParameters() { const DexFile::ProtoId& proto = dex_file_->GetMethodPrototype(referrer_method_id); const DexFile::TypeList* arg_types = dex_file_->GetProtoParameters(proto); for (int i = 0, shorty_pos = 1; i < number_of_parameters; i++) { - HParameterValue* parameter = new (arena_) HParameterValue( + HParameterValue* parameter = new (allocator_) HParameterValue( *dex_file_, arg_types->GetTypeItem(shorty_pos - 1).type_idx_, parameter_index++, - Primitive::GetType(shorty[shorty_pos]), + DataType::FromShorty(shorty[shorty_pos]), /* is_this */ false); ++shorty_pos; AppendInstruction(parameter); // Store the parameter value in the local that the dex code will use // to reference that parameter. UpdateLocal(locals_index++, parameter); - if (Primitive::Is64BitType(parameter->GetType())) { + if (DataType::Is64BitType(parameter->GetType())) { i++; locals_index++; parameter_index++; @@ -494,110 +611,110 @@ void HInstructionBuilder::InitializeParameters() { template<typename T> void HInstructionBuilder::If_22t(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* first = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); - HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - T* comparison = new (arena_) T(first, second, dex_pc); + HInstruction* first = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); + HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); + T* comparison = new (allocator_) T(first, second, dex_pc); AppendInstruction(comparison); - AppendInstruction(new (arena_) HIf(comparison, dex_pc)); + AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); current_block_ = nullptr; } template<typename T> void HInstructionBuilder::If_21t(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); - T* comparison = new (arena_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); + HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); + T* comparison = new (allocator_) T(value, graph_->GetIntConstant(0, dex_pc), dex_pc); AppendInstruction(comparison); - AppendInstruction(new (arena_) HIf(comparison, dex_pc)); + AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); current_block_ = nullptr; } template<typename T> void HInstructionBuilder::Unop_12x(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), type); - AppendInstruction(new (arena_) T(type, first, dex_pc)); + AppendInstruction(new (allocator_) T(type, first, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } void HInstructionBuilder::Conversion_12x(const Instruction& instruction, - Primitive::Type input_type, - Primitive::Type result_type, + DataType::Type input_type, + DataType::Type result_type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), input_type); - AppendInstruction(new (arena_) HTypeConversion(result_type, first, dex_pc)); + AppendInstruction(new (allocator_) HTypeConversion(result_type, first, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_23x(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), type); HInstruction* second = LoadLocal(instruction.VRegC(), type); - AppendInstruction(new (arena_) T(type, first, second, dex_pc)); + AppendInstruction(new (allocator_) T(type, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_23x_shift(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), type); - HInstruction* second = LoadLocal(instruction.VRegC(), Primitive::kPrimInt); - AppendInstruction(new (arena_) T(type, first, second, dex_pc)); + HInstruction* second = LoadLocal(instruction.VRegC(), DataType::Type::kInt32); + AppendInstruction(new (allocator_) T(type, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } void HInstructionBuilder::Binop_23x_cmp(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, ComparisonBias bias, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegB(), type); HInstruction* second = LoadLocal(instruction.VRegC(), type); - AppendInstruction(new (arena_) HCompare(type, first, second, bias, dex_pc)); + AppendInstruction(new (allocator_) HCompare(type, first, second, bias, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_12x_shift(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegA(), type); - HInstruction* second = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); - AppendInstruction(new (arena_) T(type, first, second, dex_pc)); + HInstruction* second = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); + AppendInstruction(new (allocator_) T(type, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_12x(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { HInstruction* first = LoadLocal(instruction.VRegA(), type); HInstruction* second = LoadLocal(instruction.VRegB(), type); - AppendInstruction(new (arena_) T(type, first, second, dex_pc)); + AppendInstruction(new (allocator_) T(type, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_22s(const Instruction& instruction, bool reverse, uint32_t dex_pc) { - HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); + HInstruction* first = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22s(), dex_pc); if (reverse) { std::swap(first, second); } - AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc)); + AppendInstruction(new (allocator_) T(DataType::Type::kInt32, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } template<typename T> void HInstructionBuilder::Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc) { - HInstruction* first = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); + HInstruction* first = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); HInstruction* second = graph_->GetIntConstant(instruction.VRegC_22b(), dex_pc); if (reverse) { std::swap(first, second); } - AppendInstruction(new (arena_) T(Primitive::kPrimInt, first, second, dex_pc)); + AppendInstruction(new (allocator_) T(DataType::Type::kInt32, first, second, dex_pc)); UpdateLocal(instruction.VRegA(), current_block_->GetLastInstruction()); } @@ -629,19 +746,19 @@ static bool IsFallthroughInstruction(const Instruction& instruction, } void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t dex_pc) { - HInstruction* value = LoadLocal(instruction.VRegA(), Primitive::kPrimInt); + HInstruction* value = LoadLocal(instruction.VRegA(), DataType::Type::kInt32); DexSwitchTable table(instruction, dex_pc); if (table.GetNumEntries() == 0) { // Empty Switch. Code falls through to the next block. DCHECK(IsFallthroughInstruction(instruction, dex_pc, current_block_)); - AppendInstruction(new (arena_) HGoto(dex_pc)); + AppendInstruction(new (allocator_) HGoto(dex_pc)); } else if (table.ShouldBuildDecisionTree()) { for (DexSwitchTableIterator it(table); !it.Done(); it.Advance()) { HInstruction* case_value = graph_->GetIntConstant(it.CurrentKey(), dex_pc); - HEqual* comparison = new (arena_) HEqual(value, case_value, dex_pc); + HEqual* comparison = new (allocator_) HEqual(value, case_value, dex_pc); AppendInstruction(comparison); - AppendInstruction(new (arena_) HIf(comparison, dex_pc)); + AppendInstruction(new (allocator_) HIf(comparison, dex_pc)); if (!it.IsLast()) { current_block_ = FindBlockStartingAt(it.GetDexPcForCurrentIndex()); @@ -649,16 +766,16 @@ void HInstructionBuilder::BuildSwitch(const Instruction& instruction, uint32_t d } } else { AppendInstruction( - new (arena_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc)); + new (allocator_) HPackedSwitch(table.GetEntryAt(0), table.GetNumEntries(), value, dex_pc)); } current_block_ = nullptr; } void HInstructionBuilder::BuildReturn(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc) { - if (type == Primitive::kPrimVoid) { + if (type == DataType::Type::kVoid) { // Only <init> (which is a return-void) could possibly have a constructor fence. // This may insert additional redundant constructor fences from the super constructors. // TODO: remove redundant constructor fences (b/36656456). @@ -669,13 +786,16 @@ void HInstructionBuilder::BuildReturn(const Instruction& instruction, HInstruction* fence_target = current_this_parameter_; DCHECK(fence_target != nullptr); - AppendInstruction(new (arena_) HConstructorFence(fence_target, dex_pc, arena_)); + AppendInstruction(new (allocator_) HConstructorFence(fence_target, dex_pc, allocator_)); + MaybeRecordStat( + compilation_stats_, + MethodCompilationStat::kConstructorFenceGeneratedFinal); } - AppendInstruction(new (arena_) HReturnVoid(dex_pc)); + AppendInstruction(new (allocator_) HReturnVoid(dex_pc)); } else { DCHECK(!RequiresConstructorBarrier(dex_compilation_unit_, compiler_driver_)); HInstruction* value = LoadLocal(instruction.VRegA(), type); - AppendInstruction(new (arena_) HReturn(value, dex_pc)); + AppendInstruction(new (allocator_) HReturn(value, dex_pc)); } current_block_ = nullptr; } @@ -713,7 +833,6 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in ArtMethod* resolved_method = class_linker->ResolveMethod<ClassLinker::ResolveMode::kCheckICCEAndIAE>( - *dex_compilation_unit_->GetDexFile(), method_idx, dex_compilation_unit_->GetDexCache(), class_loader, @@ -748,7 +867,6 @@ ArtMethod* HInstructionBuilder::ResolveMethod(uint16_t method_idx, InvokeType in return nullptr; } ObjPtr<mirror::Class> referenced_class = class_linker->LookupResolvedType( - *dex_compilation_unit_->GetDexFile(), dex_compilation_unit_->GetDexFile()->GetMethodId(method_idx).class_idx_, dex_compilation_unit_->GetDexCache().Get(), class_loader.Get()); @@ -804,7 +922,7 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, uint32_t register_index) { InvokeType invoke_type = GetInvokeTypeFromOpCode(instruction.Opcode()); const char* descriptor = dex_file_->GetMethodShorty(method_idx); - Primitive::Type return_type = Primitive::GetType(descriptor[0]); + DataType::Type return_type = DataType::FromShorty(descriptor[0]); // Remove the return type from the 'proto'. size_t number_of_arguments = strlen(descriptor) - 1; @@ -816,13 +934,14 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, ArtMethod* resolved_method = ResolveMethod(method_idx, invoke_type); if (UNLIKELY(resolved_method == nullptr)) { - MaybeRecordStat(MethodCompilationStat::kUnresolvedMethod); - HInvoke* invoke = new (arena_) HInvokeUnresolved(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx, - invoke_type); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kUnresolvedMethod); + HInvoke* invoke = new (allocator_) HInvokeUnresolved(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + invoke_type); return HandleInvoke(invoke, number_of_vreg_arguments, args, @@ -841,14 +960,18 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, dchecked_integral_cast<uint64_t>(string_init_entry_point) }; - MethodReference target_method(dex_file_, method_idx); - HInvoke* invoke = new (arena_) HInvokeStaticOrDirect( - arena_, + ScopedObjectAccess soa(Thread::Current()); + MethodReference target_method(resolved_method->GetDexFile(), + resolved_method->GetDexMethodIndex()); + // We pass null for the resolved_method to ensure optimizations + // don't rely on it. + HInvoke* invoke = new (allocator_) HInvokeStaticOrDirect( + allocator_, number_of_arguments - 1, - Primitive::kPrimNot /*return_type */, + DataType::Type::kReference /*return_type */, dex_pc, method_idx, - nullptr, + nullptr /* resolved_method */, dispatch_info, invoke_type, target_method, @@ -888,35 +1011,35 @@ bool HInstructionBuilder::BuildInvoke(const Instruction& instruction, }; MethodReference target_method(resolved_method->GetDexFile(), resolved_method->GetDexMethodIndex()); - invoke = new (arena_) HInvokeStaticOrDirect(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx, - resolved_method, - dispatch_info, - invoke_type, - target_method, - clinit_check_requirement); + invoke = new (allocator_) HInvokeStaticOrDirect(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + resolved_method, + dispatch_info, + invoke_type, + target_method, + clinit_check_requirement); } else if (invoke_type == kVirtual) { ScopedObjectAccess soa(Thread::Current()); // Needed for the method index - invoke = new (arena_) HInvokeVirtual(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx, - resolved_method, - resolved_method->GetMethodIndex()); + invoke = new (allocator_) HInvokeVirtual(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + resolved_method, + resolved_method->GetMethodIndex()); } else { DCHECK_EQ(invoke_type, kInterface); ScopedObjectAccess soa(Thread::Current()); // Needed for the IMT index. - invoke = new (arena_) HInvokeInterface(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx, - resolved_method, - ImTable::GetImtIndex(resolved_method)); + invoke = new (allocator_) HInvokeInterface(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx, + resolved_method, + ImTable::GetImtIndex(resolved_method)); } return HandleInvoke(invoke, @@ -939,13 +1062,13 @@ bool HInstructionBuilder::BuildInvokePolymorphic(const Instruction& instruction uint32_t register_index) { const char* descriptor = dex_file_->GetShorty(proto_idx); DCHECK_EQ(1 + ArtMethod::NumArgRegisters(descriptor), number_of_vreg_arguments); - Primitive::Type return_type = Primitive::GetType(descriptor[0]); + DataType::Type return_type = DataType::FromShorty(descriptor[0]); size_t number_of_arguments = strlen(descriptor); - HInvoke* invoke = new (arena_) HInvokePolymorphic(arena_, - number_of_arguments, - return_type, - dex_pc, - method_idx); + HInvoke* invoke = new (allocator_) HInvokePolymorphic(allocator_, + number_of_arguments, + return_type, + dex_pc, + method_idx); return HandleInvoke(invoke, number_of_vreg_arguments, args, @@ -965,7 +1088,7 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u Handle<mirror::Class> klass = load_class->GetClass(); if (!IsInitialized(klass)) { - cls = new (arena_) HClinitCheck(load_class, dex_pc); + cls = new (allocator_) HClinitCheck(load_class, dex_pc); AppendInstruction(cls); } @@ -980,7 +1103,7 @@ HNewInstance* HInstructionBuilder::BuildNewInstance(dex::TypeIndex type_index, u // Consider classes we haven't resolved as potentially finalizable. bool finalizable = (klass == nullptr) || klass->IsFinalizable(); - HNewInstance* new_instance = new (arena_) HNewInstance( + HNewInstance* new_instance = new (allocator_) HNewInstance( cls, dex_pc, type_index, @@ -1037,11 +1160,14 @@ void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* alloc // (and in theory the 0-initializing, but that happens automatically // when new memory pages are mapped in by the OS). HConstructorFence* ctor_fence = - new (arena_) HConstructorFence(allocation, allocation->GetDexPc(), arena_); + new (allocator_) HConstructorFence(allocation, allocation->GetDexPc(), allocator_); AppendInstruction(ctor_fence); + MaybeRecordStat( + compilation_stats_, + MethodCompilationStat::kConstructorFenceGeneratedNew); } -static bool IsSubClass(mirror::Class* to_test, mirror::Class* super_class) +static bool IsSubClass(ObjPtr<mirror::Class> to_test, ObjPtr<mirror::Class> super_class) REQUIRES_SHARED(Locks::mutator_lock_) { return to_test != nullptr && !to_test->IsInterface() && to_test->IsSubClass(super_class); } @@ -1088,7 +1214,7 @@ HClinitCheck* HInstructionBuilder::ProcessClinitCheckForInvoke( /* needs_access_check */ false); if (cls != nullptr) { *clinit_check_requirement = HInvokeStaticOrDirect::ClinitCheckRequirement::kExplicit; - clinit_check = new (arena_) HClinitCheck(cls, dex_pc); + clinit_check = new (allocator_) HClinitCheck(cls, dex_pc); AppendInstruction(clinit_check); } } @@ -1111,8 +1237,8 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, // it hasn't been properly checked. (i < number_of_vreg_arguments) && (*argument_index < invoke->GetNumberOfArguments()); i++, (*argument_index)++) { - Primitive::Type type = Primitive::GetType(descriptor[descriptor_index++]); - bool is_wide = (type == Primitive::kPrimLong) || (type == Primitive::kPrimDouble); + DataType::Type type = DataType::FromShorty(descriptor[descriptor_index++]); + bool is_wide = (type == DataType::Type::kInt64) || (type == DataType::Type::kFloat64); if (!is_range && is_wide && ((i + 1 == number_of_vreg_arguments) || (args[i] + 1 != args[i + 1]))) { @@ -1122,7 +1248,8 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, VLOG(compiler) << "Did not compile " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of non-sequential dex register pair in wide argument"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } HInstruction* arg = LoadLocal(is_range ? register_index + i : args[i], type); @@ -1136,7 +1263,8 @@ bool HInstructionBuilder::SetupInvokeArguments(HInvoke* invoke, VLOG(compiler) << "Did not compile " << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of wrong number of arguments in invoke instruction"; - MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kNotCompiledMalformedOpcode); return false; } @@ -1165,7 +1293,7 @@ bool HInstructionBuilder::HandleInvoke(HInvoke* invoke, if (invoke->GetInvokeType() != InvokeType::kStatic) { // Instance call. uint32_t obj_reg = is_range ? register_index : args[0]; HInstruction* arg = is_unresolved - ? LoadLocal(obj_reg, Primitive::kPrimNot) + ? LoadLocal(obj_reg, DataType::Type::kReference) : LoadNullCheckedLocal(obj_reg, invoke->GetDexPc()); invoke->SetArgumentAt(0, arg); start_index = 1; @@ -1225,7 +1353,7 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, // This is a StringFactory call, not an actual String constructor. Its result // replaces the empty String pre-allocated by NewInstance. uint32_t orig_this_reg = is_range ? register_index : args[0]; - HInstruction* arg_this = LoadLocal(orig_this_reg, Primitive::kPrimNot); + HInstruction* arg_this = LoadLocal(orig_this_reg, DataType::Type::kReference); // Replacing the NewInstance might render it redundant. Keep a list of these // to be visited once it is clear whether it is has remaining uses. @@ -1247,10 +1375,10 @@ bool HInstructionBuilder::HandleStringInit(HInvoke* invoke, return true; } -static Primitive::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) { +static DataType::Type GetFieldAccessType(const DexFile& dex_file, uint16_t field_index) { const DexFile::FieldId& field_id = dex_file.GetFieldId(field_index); const char* type = dex_file.GetFieldTypeDescriptor(field_id); - return Primitive::GetType(type[0]); + return DataType::FromShorty(type[0]); } bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instruction, @@ -1262,6 +1390,8 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio uint16_t field_index; if (instruction.IsQuickened()) { if (!CanDecodeQuickenedInfo()) { + VLOG(compiler) << "Not compiled: Could not decode quickened instruction " + << instruction.Opcode(); return false; } field_index = LookupQuickenedInfo(quicken_index); @@ -1276,55 +1406,55 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio // is unresolved. In that case, we rely on the runtime to perform various // checks first, followed by a null check. HInstruction* object = (resolved_field == nullptr) - ? LoadLocal(obj_reg, Primitive::kPrimNot) + ? LoadLocal(obj_reg, DataType::Type::kReference) : LoadNullCheckedLocal(obj_reg, dex_pc); - Primitive::Type field_type = (resolved_field == nullptr) - ? GetFieldAccessType(*dex_file_, field_index) - : resolved_field->GetTypeAsPrimitiveType(); + DataType::Type field_type = GetFieldAccessType(*dex_file_, field_index); if (is_put) { HInstruction* value = LoadLocal(source_or_dest_reg, field_type); HInstruction* field_set = nullptr; if (resolved_field == nullptr) { - MaybeRecordStat(MethodCompilationStat::kUnresolvedField); - field_set = new (arena_) HUnresolvedInstanceFieldSet(object, - value, - field_type, - field_index, - dex_pc); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kUnresolvedField); + field_set = new (allocator_) HUnresolvedInstanceFieldSet(object, + value, + field_type, + field_index, + dex_pc); } else { uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); - field_set = new (arena_) HInstanceFieldSet(object, - value, - resolved_field, - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - class_def_index, - *dex_file_, - dex_pc); + field_set = new (allocator_) HInstanceFieldSet(object, + value, + resolved_field, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + class_def_index, + *dex_file_, + dex_pc); } AppendInstruction(field_set); } else { HInstruction* field_get = nullptr; if (resolved_field == nullptr) { - MaybeRecordStat(MethodCompilationStat::kUnresolvedField); - field_get = new (arena_) HUnresolvedInstanceFieldGet(object, - field_type, - field_index, - dex_pc); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kUnresolvedField); + field_get = new (allocator_) HUnresolvedInstanceFieldGet(object, + field_type, + field_index, + dex_pc); } else { uint16_t class_def_index = resolved_field->GetDeclaringClass()->GetDexClassDefIndex(); - field_get = new (arena_) HInstanceFieldGet(object, - resolved_field, - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - class_def_index, - *dex_file_, - dex_pc); + field_get = new (allocator_) HInstanceFieldGet(object, + resolved_field, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + class_def_index, + *dex_file_, + dex_pc); } AppendInstruction(field_get); UpdateLocal(source_or_dest_reg, field_get); @@ -1333,8 +1463,8 @@ bool HInstructionBuilder::BuildInstanceFieldAccess(const Instruction& instructio return true; } -static mirror::Class* GetClassFrom(CompilerDriver* driver, - const DexCompilationUnit& compilation_unit) { +static ObjPtr<mirror::Class> GetClassFrom(CompilerDriver* driver, + const DexCompilationUnit& compilation_unit) { ScopedObjectAccess soa(Thread::Current()); Handle<mirror::ClassLoader> class_loader = compilation_unit.GetClassLoader(); Handle<mirror::DexCache> dex_cache = compilation_unit.GetDexCache(); @@ -1342,11 +1472,11 @@ static mirror::Class* GetClassFrom(CompilerDriver* driver, return driver->ResolveCompilingMethodsClass(soa, dex_cache, class_loader, &compilation_unit); } -mirror::Class* HInstructionBuilder::GetOutermostCompilingClass() const { +ObjPtr<mirror::Class> HInstructionBuilder::GetOutermostCompilingClass() const { return GetClassFrom(compiler_driver_, *outer_compilation_unit_); } -mirror::Class* HInstructionBuilder::GetCompilingClass() const { +ObjPtr<mirror::Class> HInstructionBuilder::GetCompilingClass() const { return GetClassFrom(compiler_driver_, *dex_compilation_unit_); } @@ -1371,16 +1501,16 @@ bool HInstructionBuilder::IsOutermostCompilingClass(dex::TypeIndex type_index) c void HInstructionBuilder::BuildUnresolvedStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put, - Primitive::Type field_type) { + DataType::Type field_type) { uint32_t source_or_dest_reg = instruction.VRegA_21c(); uint16_t field_index = instruction.VRegB_21c(); if (is_put) { HInstruction* value = LoadLocal(source_or_dest_reg, field_type); AppendInstruction( - new (arena_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc)); + new (allocator_) HUnresolvedStaticFieldSet(value, field_type, field_index, dex_pc)); } else { - AppendInstruction(new (arena_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc)); + AppendInstruction(new (allocator_) HUnresolvedStaticFieldGet(field_type, field_index, dex_pc)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } } @@ -1393,12 +1523,10 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, Handle<mirror::ClassLoader> class_loader = dex_compilation_unit_->GetClassLoader(); Handle<mirror::Class> compiling_class(hs.NewHandle(GetCompilingClass())); - ArtField* resolved_field = class_linker->ResolveField(*dex_compilation_unit_->GetDexFile(), - field_idx, + ArtField* resolved_field = class_linker->ResolveField(field_idx, dex_compilation_unit_->GetDexCache(), class_loader, is_static); - if (UNLIKELY(resolved_field == nullptr)) { // Clean up any exception left by type resolution. soa.Self()->ClearException(); @@ -1434,7 +1562,7 @@ ArtField* HInstructionBuilder::ResolveField(uint16_t field_idx, bool is_static, return resolved_field; } -bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, +void HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put) { uint32_t source_or_dest_reg = instruction.VRegA_21c(); @@ -1444,13 +1572,14 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, ArtField* resolved_field = ResolveField(field_index, /* is_static */ true, is_put); if (resolved_field == nullptr) { - MaybeRecordStat(MethodCompilationStat::kUnresolvedField); - Primitive::Type field_type = GetFieldAccessType(*dex_file_, field_index); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kUnresolvedField); + DataType::Type field_type = GetFieldAccessType(*dex_file_, field_index); BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type); - return true; + return; } - Primitive::Type field_type = resolved_field->GetTypeAsPrimitiveType(); + DataType::Type field_type = GetFieldAccessType(*dex_file_, field_index); Handle<mirror::Class> klass = handles_->NewHandle(resolved_field->GetDeclaringClass()); HLoadClass* constant = BuildLoadClass(klass->GetDexTypeIndex(), @@ -1462,14 +1591,15 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, if (constant == nullptr) { // The class cannot be referenced from this compiled code. Generate // an unresolved access. - MaybeRecordStat(MethodCompilationStat::kUnresolvedFieldNotAFastAccess); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kUnresolvedFieldNotAFastAccess); BuildUnresolvedStaticFieldAccess(instruction, dex_pc, is_put, field_type); - return true; + return; } HInstruction* cls = constant; if (!IsInitialized(klass)) { - cls = new (arena_) HClinitCheck(constant, dex_pc); + cls = new (allocator_) HClinitCheck(constant, dex_pc); AppendInstruction(cls); } @@ -1478,44 +1608,43 @@ bool HInstructionBuilder::BuildStaticFieldAccess(const Instruction& instruction, // We need to keep the class alive before loading the value. HInstruction* value = LoadLocal(source_or_dest_reg, field_type); DCHECK_EQ(HPhi::ToPhiType(value->GetType()), HPhi::ToPhiType(field_type)); - AppendInstruction(new (arena_) HStaticFieldSet(cls, - value, - resolved_field, - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - class_def_index, - *dex_file_, - dex_pc)); + AppendInstruction(new (allocator_) HStaticFieldSet(cls, + value, + resolved_field, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + class_def_index, + *dex_file_, + dex_pc)); } else { - AppendInstruction(new (arena_) HStaticFieldGet(cls, - resolved_field, - field_type, - resolved_field->GetOffset(), - resolved_field->IsVolatile(), - field_index, - class_def_index, - *dex_file_, - dex_pc)); + AppendInstruction(new (allocator_) HStaticFieldGet(cls, + resolved_field, + field_type, + resolved_field->GetOffset(), + resolved_field->IsVolatile(), + field_index, + class_def_index, + *dex_file_, + dex_pc)); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); } - return true; } void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg, - uint16_t first_vreg, - int64_t second_vreg_or_constant, - uint32_t dex_pc, - Primitive::Type type, - bool second_is_constant, - bool isDiv) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + uint16_t first_vreg, + int64_t second_vreg_or_constant, + uint32_t dex_pc, + DataType::Type type, + bool second_is_constant, + bool isDiv) { + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); HInstruction* first = LoadLocal(first_vreg, type); HInstruction* second = nullptr; if (second_is_constant) { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { second = graph_->GetIntConstant(second_vreg_or_constant, dex_pc); } else { second = graph_->GetLongConstant(second_vreg_or_constant, dex_pc); @@ -1525,16 +1654,16 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg, } if (!second_is_constant - || (type == Primitive::kPrimInt && second->AsIntConstant()->GetValue() == 0) - || (type == Primitive::kPrimLong && second->AsLongConstant()->GetValue() == 0)) { - second = new (arena_) HDivZeroCheck(second, dex_pc); + || (type == DataType::Type::kInt32 && second->AsIntConstant()->GetValue() == 0) + || (type == DataType::Type::kInt64 && second->AsLongConstant()->GetValue() == 0)) { + second = new (allocator_) HDivZeroCheck(second, dex_pc); AppendInstruction(second); } if (isDiv) { - AppendInstruction(new (arena_) HDiv(type, first, second, dex_pc)); + AppendInstruction(new (allocator_) HDiv(type, first, second, dex_pc)); } else { - AppendInstruction(new (arena_) HRem(type, first, second, dex_pc)); + AppendInstruction(new (allocator_) HRem(type, first, second, dex_pc)); } UpdateLocal(out_vreg, current_block_->GetLastInstruction()); } @@ -1542,25 +1671,25 @@ void HInstructionBuilder::BuildCheckedDivRem(uint16_t out_vreg, void HInstructionBuilder::BuildArrayAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put, - Primitive::Type anticipated_type) { + DataType::Type anticipated_type) { uint8_t source_or_dest_reg = instruction.VRegA_23x(); uint8_t array_reg = instruction.VRegB_23x(); uint8_t index_reg = instruction.VRegC_23x(); HInstruction* object = LoadNullCheckedLocal(array_reg, dex_pc); - HInstruction* length = new (arena_) HArrayLength(object, dex_pc); + HInstruction* length = new (allocator_) HArrayLength(object, dex_pc); AppendInstruction(length); - HInstruction* index = LoadLocal(index_reg, Primitive::kPrimInt); - index = new (arena_) HBoundsCheck(index, length, dex_pc); + HInstruction* index = LoadLocal(index_reg, DataType::Type::kInt32); + index = new (allocator_) HBoundsCheck(index, length, dex_pc); AppendInstruction(index); if (is_put) { HInstruction* value = LoadLocal(source_or_dest_reg, anticipated_type); // TODO: Insert a type check node if the type is Object. - HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc); + HArraySet* aset = new (allocator_) HArraySet(object, index, value, anticipated_type, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } else { - HArrayGet* aget = new (arena_) HArrayGet(object, index, anticipated_type, dex_pc); + HArrayGet* aget = new (allocator_) HArrayGet(object, index, anticipated_type, dex_pc); ssa_builder_->MaybeAddAmbiguousArrayGet(aget); AppendInstruction(aget); UpdateLocal(source_or_dest_reg, current_block_->GetLastInstruction()); @@ -1576,7 +1705,7 @@ HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, uint32_t register_index) { HInstruction* length = graph_->GetIntConstant(number_of_vreg_arguments, dex_pc); HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - HNewArray* const object = new (arena_) HNewArray(cls, length, dex_pc); + HNewArray* const object = new (allocator_) HNewArray(cls, length, dex_pc); AppendInstruction(object); const char* descriptor = dex_file_->StringByTypeIdx(type_index); @@ -1586,12 +1715,12 @@ HNewArray* HInstructionBuilder::BuildFilledNewArray(uint32_t dex_pc, || primitive == 'L' || primitive == '[') << descriptor; bool is_reference_array = (primitive == 'L') || (primitive == '['); - Primitive::Type type = is_reference_array ? Primitive::kPrimNot : Primitive::kPrimInt; + DataType::Type type = is_reference_array ? DataType::Type::kReference : DataType::Type::kInt32; for (size_t i = 0; i < number_of_vreg_arguments; ++i) { HInstruction* value = LoadLocal(is_range ? register_index + i : args[i], type); HInstruction* index = graph_->GetIntConstant(i, dex_pc); - HArraySet* aset = new (arena_) HArraySet(object, index, value, type, dex_pc); + HArraySet* aset = new (allocator_) HArraySet(object, index, value, type, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } @@ -1604,12 +1733,12 @@ template <typename T> void HInstructionBuilder::BuildFillArrayData(HInstruction* object, const T* data, uint32_t element_count, - Primitive::Type anticipated_type, + DataType::Type anticipated_type, uint32_t dex_pc) { for (uint32_t i = 0; i < element_count; ++i) { HInstruction* index = graph_->GetIntConstant(i, dex_pc); HInstruction* value = graph_->GetIntConstant(data[i], dex_pc); - HArraySet* aset = new (arena_) HArraySet(object, index, value, anticipated_type, dex_pc); + HArraySet* aset = new (allocator_) HArraySet(object, index, value, anticipated_type, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } @@ -1620,7 +1749,8 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin int32_t payload_offset = instruction.VRegB_31t() + dex_pc; const Instruction::ArrayDataPayload* payload = - reinterpret_cast<const Instruction::ArrayDataPayload*>(code_item_.insns_ + payload_offset); + reinterpret_cast<const Instruction::ArrayDataPayload*>( + code_item_accessor_.Insns() + payload_offset); const uint8_t* data = payload->data; uint32_t element_count = payload->element_count; @@ -1629,34 +1759,34 @@ void HInstructionBuilder::BuildFillArrayData(const Instruction& instruction, uin return; } - HInstruction* length = new (arena_) HArrayLength(array, dex_pc); + HInstruction* length = new (allocator_) HArrayLength(array, dex_pc); AppendInstruction(length); // Implementation of this DEX instruction seems to be that the bounds check is // done before doing any stores. HInstruction* last_index = graph_->GetIntConstant(payload->element_count - 1, dex_pc); - AppendInstruction(new (arena_) HBoundsCheck(last_index, length, dex_pc)); + AppendInstruction(new (allocator_) HBoundsCheck(last_index, length, dex_pc)); switch (payload->element_width) { case 1: BuildFillArrayData(array, reinterpret_cast<const int8_t*>(data), element_count, - Primitive::kPrimByte, + DataType::Type::kInt8, dex_pc); break; case 2: BuildFillArrayData(array, reinterpret_cast<const int16_t*>(data), element_count, - Primitive::kPrimShort, + DataType::Type::kInt16, dex_pc); break; case 4: BuildFillArrayData(array, reinterpret_cast<const int32_t*>(data), element_count, - Primitive::kPrimInt, + DataType::Type::kInt32, dex_pc); break; case 8: @@ -1678,7 +1808,8 @@ void HInstructionBuilder::BuildFillWideArrayData(HInstruction* object, for (uint32_t i = 0; i < element_count; ++i) { HInstruction* index = graph_->GetIntConstant(i, dex_pc); HInstruction* value = graph_->GetLongConstant(data[i], dex_pc); - HArraySet* aset = new (arena_) HArraySet(object, index, value, Primitive::kPrimLong, dex_pc); + HArraySet* aset = + new (allocator_) HArraySet(object, index, value, DataType::Type::kInt64, dex_pc); ssa_builder_->MaybeAddAmbiguousArraySet(aset); AppendInstruction(aset); } @@ -1707,6 +1838,17 @@ static TypeCheckKind ComputeTypeCheckKind(Handle<mirror::Class> cls) } } +void HInstructionBuilder::BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc) { + HLoadString* load_string = + new (allocator_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc); + HSharpening::ProcessLoadString(load_string, + code_generator_, + compiler_driver_, + *dex_compilation_unit_, + handles_); + AppendInstruction(load_string); +} + HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc) { ScopedObjectAccess soa(Thread::Current()); const DexFile& dex_file = *dex_compilation_unit_->GetDexFile(); @@ -1719,7 +1861,7 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, uint3 if (klass->IsPublic()) { needs_access_check = false; } else { - mirror::Class* compiling_class = GetCompilingClass(); + ObjPtr<mirror::Class> compiling_class = GetCompilingClass(); if (compiling_class != nullptr && compiling_class->CanAccess(klass.Get())) { needs_access_check = false; } @@ -1746,7 +1888,7 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, } // Note: `klass` must be from `handles_`. - HLoadClass* load_class = new (arena_) HLoadClass( + HLoadClass* load_class = new (allocator_) HLoadClass( graph_->GetCurrentMethod(), type_index, *actual_dex_file, @@ -1764,9 +1906,9 @@ HLoadClass* HInstructionBuilder::BuildLoadClass(dex::TypeIndex type_index, // We actually cannot reference this class, we're forced to bail. return nullptr; } - // Append the instruction first, as setting the load kind affects the inputs. - AppendInstruction(load_class); + // Load kind must be set before inserting the instruction into the graph. load_class->SetLoadKind(load_kind); + AppendInstruction(load_class); return load_class; } @@ -1775,21 +1917,21 @@ void HInstructionBuilder::BuildTypeCheck(const Instruction& instruction, uint8_t reference, dex::TypeIndex type_index, uint32_t dex_pc) { - HInstruction* object = LoadLocal(reference, Primitive::kPrimNot); + HInstruction* object = LoadLocal(reference, DataType::Type::kReference); HLoadClass* cls = BuildLoadClass(type_index, dex_pc); ScopedObjectAccess soa(Thread::Current()); TypeCheckKind check_kind = ComputeTypeCheckKind(cls->GetClass()); if (instruction.Opcode() == Instruction::INSTANCE_OF) { - AppendInstruction(new (arena_) HInstanceOf(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HInstanceOf(object, cls, check_kind, dex_pc)); UpdateLocal(destination, current_block_->GetLastInstruction()); } else { DCHECK_EQ(instruction.Opcode(), Instruction::CHECK_CAST); // We emit a CheckCast followed by a BoundType. CheckCast is a statement // which may throw. If it succeeds BoundType sets the new type of `object` // for all subsequent uses. - AppendInstruction(new (arena_) HCheckCast(object, cls, check_kind, dex_pc)); - AppendInstruction(new (arena_) HBoundType(object, dex_pc)); + AppendInstruction(new (allocator_) HCheckCast(object, cls, check_kind, dex_pc)); + AppendInstruction(new (allocator_) HBoundType(object, dex_pc)); UpdateLocal(reference, current_block_->GetLastInstruction()); } } @@ -1881,7 +2023,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::MOVE: case Instruction::MOVE_FROM16: case Instruction::MOVE_16: { - HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimInt); + HInstruction* value = LoadLocal(instruction.VRegB(), DataType::Type::kInt32); UpdateLocal(instruction.VRegA(), value); break; } @@ -1890,7 +2032,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::MOVE_WIDE: case Instruction::MOVE_WIDE_FROM16: case Instruction::MOVE_WIDE_16: { - HInstruction* value = LoadLocal(instruction.VRegB(), Primitive::kPrimLong); + HInstruction* value = LoadLocal(instruction.VRegB(), DataType::Type::kInt64); UpdateLocal(instruction.VRegA(), value); break; } @@ -1908,9 +2050,10 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, if (value->IsIntConstant()) { DCHECK_EQ(value->AsIntConstant()->GetValue(), 0); } else if (value->IsPhi()) { - DCHECK(value->GetType() == Primitive::kPrimInt || value->GetType() == Primitive::kPrimNot); + DCHECK(value->GetType() == DataType::Type::kInt32 || + value->GetType() == DataType::Type::kReference); } else { - value = LoadLocal(reg_number, Primitive::kPrimNot); + value = LoadLocal(reg_number, DataType::Type::kReference); } UpdateLocal(instruction.VRegA(), value); break; @@ -1918,7 +2061,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::RETURN_VOID_NO_BARRIER: case Instruction::RETURN_VOID: { - BuildReturn(instruction, Primitive::kPrimVoid, dex_pc); + BuildReturn(instruction, DataType::Type::kVoid, dex_pc); break; } @@ -1936,7 +2079,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::GOTO: case Instruction::GOTO_16: case Instruction::GOTO_32: { - AppendInstruction(new (arena_) HGoto(dex_pc)); + AppendInstruction(new (allocator_) HGoto(dex_pc)); current_block_ = nullptr; break; } @@ -1965,6 +2108,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint16_t method_idx; if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_QUICK) { if (!CanDecodeQuickenedInfo()) { + VLOG(compiler) << "Not compiled: Could not decode quickened instruction " + << instruction.Opcode(); return false; } method_idx = LookupQuickenedInfo(quicken_index); @@ -1990,6 +2135,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, uint16_t method_idx; if (instruction.Opcode() == Instruction::INVOKE_VIRTUAL_RANGE_QUICK) { if (!CanDecodeQuickenedInfo()) { + VLOG(compiler) << "Not compiled: Could not decode quickened instruction " + << instruction.Opcode(); return false; } method_idx = LookupQuickenedInfo(quicken_index); @@ -2037,435 +2184,435 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } case Instruction::NEG_INT: { - Unop_12x<HNeg>(instruction, Primitive::kPrimInt, dex_pc); + Unop_12x<HNeg>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::NEG_LONG: { - Unop_12x<HNeg>(instruction, Primitive::kPrimLong, dex_pc); + Unop_12x<HNeg>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::NEG_FLOAT: { - Unop_12x<HNeg>(instruction, Primitive::kPrimFloat, dex_pc); + Unop_12x<HNeg>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::NEG_DOUBLE: { - Unop_12x<HNeg>(instruction, Primitive::kPrimDouble, dex_pc); + Unop_12x<HNeg>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::NOT_INT: { - Unop_12x<HNot>(instruction, Primitive::kPrimInt, dex_pc); + Unop_12x<HNot>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::NOT_LONG: { - Unop_12x<HNot>(instruction, Primitive::kPrimLong, dex_pc); + Unop_12x<HNot>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::INT_TO_LONG: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimLong, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kInt64, dex_pc); break; } case Instruction::INT_TO_FLOAT: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimFloat, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kFloat32, dex_pc); break; } case Instruction::INT_TO_DOUBLE: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimDouble, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kFloat64, dex_pc); break; } case Instruction::LONG_TO_INT: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimInt, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt64, DataType::Type::kInt32, dex_pc); break; } case Instruction::LONG_TO_FLOAT: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimFloat, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt64, DataType::Type::kFloat32, dex_pc); break; } case Instruction::LONG_TO_DOUBLE: { - Conversion_12x(instruction, Primitive::kPrimLong, Primitive::kPrimDouble, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt64, DataType::Type::kFloat64, dex_pc); break; } case Instruction::FLOAT_TO_INT: { - Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimInt, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat32, DataType::Type::kInt32, dex_pc); break; } case Instruction::FLOAT_TO_LONG: { - Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimLong, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat32, DataType::Type::kInt64, dex_pc); break; } case Instruction::FLOAT_TO_DOUBLE: { - Conversion_12x(instruction, Primitive::kPrimFloat, Primitive::kPrimDouble, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat32, DataType::Type::kFloat64, dex_pc); break; } case Instruction::DOUBLE_TO_INT: { - Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimInt, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat64, DataType::Type::kInt32, dex_pc); break; } case Instruction::DOUBLE_TO_LONG: { - Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimLong, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat64, DataType::Type::kInt64, dex_pc); break; } case Instruction::DOUBLE_TO_FLOAT: { - Conversion_12x(instruction, Primitive::kPrimDouble, Primitive::kPrimFloat, dex_pc); + Conversion_12x(instruction, DataType::Type::kFloat64, DataType::Type::kFloat32, dex_pc); break; } case Instruction::INT_TO_BYTE: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimByte, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kInt8, dex_pc); break; } case Instruction::INT_TO_SHORT: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimShort, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kInt16, dex_pc); break; } case Instruction::INT_TO_CHAR: { - Conversion_12x(instruction, Primitive::kPrimInt, Primitive::kPrimChar, dex_pc); + Conversion_12x(instruction, DataType::Type::kInt32, DataType::Type::kUint16, dex_pc); break; } case Instruction::ADD_INT: { - Binop_23x<HAdd>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HAdd>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::ADD_LONG: { - Binop_23x<HAdd>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HAdd>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::ADD_DOUBLE: { - Binop_23x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_23x<HAdd>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::ADD_FLOAT: { - Binop_23x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_23x<HAdd>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::SUB_INT: { - Binop_23x<HSub>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HSub>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SUB_LONG: { - Binop_23x<HSub>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HSub>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::SUB_FLOAT: { - Binop_23x<HSub>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_23x<HSub>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::SUB_DOUBLE: { - Binop_23x<HSub>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_23x<HSub>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::ADD_INT_2ADDR: { - Binop_12x<HAdd>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HAdd>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::MUL_INT: { - Binop_23x<HMul>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HMul>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::MUL_LONG: { - Binop_23x<HMul>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HMul>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::MUL_FLOAT: { - Binop_23x<HMul>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_23x<HMul>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::MUL_DOUBLE: { - Binop_23x<HMul>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_23x<HMul>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::DIV_INT: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimInt, false, true); + dex_pc, DataType::Type::kInt32, false, true); break; } case Instruction::DIV_LONG: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimLong, false, true); + dex_pc, DataType::Type::kInt64, false, true); break; } case Instruction::DIV_FLOAT: { - Binop_23x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_23x<HDiv>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::DIV_DOUBLE: { - Binop_23x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_23x<HDiv>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::REM_INT: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimInt, false, false); + dex_pc, DataType::Type::kInt32, false, false); break; } case Instruction::REM_LONG: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimLong, false, false); + dex_pc, DataType::Type::kInt64, false, false); break; } case Instruction::REM_FLOAT: { - Binop_23x<HRem>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_23x<HRem>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::REM_DOUBLE: { - Binop_23x<HRem>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_23x<HRem>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::AND_INT: { - Binop_23x<HAnd>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HAnd>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::AND_LONG: { - Binop_23x<HAnd>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HAnd>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::SHL_INT: { - Binop_23x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x_shift<HShl>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SHL_LONG: { - Binop_23x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x_shift<HShl>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::SHR_INT: { - Binop_23x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x_shift<HShr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SHR_LONG: { - Binop_23x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x_shift<HShr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::USHR_INT: { - Binop_23x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x_shift<HUShr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::USHR_LONG: { - Binop_23x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x_shift<HUShr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::OR_INT: { - Binop_23x<HOr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HOr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::OR_LONG: { - Binop_23x<HOr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HOr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::XOR_INT: { - Binop_23x<HXor>(instruction, Primitive::kPrimInt, dex_pc); + Binop_23x<HXor>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::XOR_LONG: { - Binop_23x<HXor>(instruction, Primitive::kPrimLong, dex_pc); + Binop_23x<HXor>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::ADD_LONG_2ADDR: { - Binop_12x<HAdd>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HAdd>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::ADD_DOUBLE_2ADDR: { - Binop_12x<HAdd>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_12x<HAdd>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::ADD_FLOAT_2ADDR: { - Binop_12x<HAdd>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_12x<HAdd>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::SUB_INT_2ADDR: { - Binop_12x<HSub>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HSub>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SUB_LONG_2ADDR: { - Binop_12x<HSub>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HSub>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::SUB_FLOAT_2ADDR: { - Binop_12x<HSub>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_12x<HSub>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::SUB_DOUBLE_2ADDR: { - Binop_12x<HSub>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_12x<HSub>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::MUL_INT_2ADDR: { - Binop_12x<HMul>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HMul>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::MUL_LONG_2ADDR: { - Binop_12x<HMul>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HMul>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::MUL_FLOAT_2ADDR: { - Binop_12x<HMul>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_12x<HMul>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::MUL_DOUBLE_2ADDR: { - Binop_12x<HMul>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_12x<HMul>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::DIV_INT_2ADDR: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(), - dex_pc, Primitive::kPrimInt, false, true); + dex_pc, DataType::Type::kInt32, false, true); break; } case Instruction::DIV_LONG_2ADDR: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(), - dex_pc, Primitive::kPrimLong, false, true); + dex_pc, DataType::Type::kInt64, false, true); break; } case Instruction::REM_INT_2ADDR: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(), - dex_pc, Primitive::kPrimInt, false, false); + dex_pc, DataType::Type::kInt32, false, false); break; } case Instruction::REM_LONG_2ADDR: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegA(), instruction.VRegB(), - dex_pc, Primitive::kPrimLong, false, false); + dex_pc, DataType::Type::kInt64, false, false); break; } case Instruction::REM_FLOAT_2ADDR: { - Binop_12x<HRem>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_12x<HRem>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::REM_DOUBLE_2ADDR: { - Binop_12x<HRem>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_12x<HRem>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::SHL_INT_2ADDR: { - Binop_12x_shift<HShl>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x_shift<HShl>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SHL_LONG_2ADDR: { - Binop_12x_shift<HShl>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x_shift<HShl>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::SHR_INT_2ADDR: { - Binop_12x_shift<HShr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x_shift<HShr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::SHR_LONG_2ADDR: { - Binop_12x_shift<HShr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x_shift<HShr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::USHR_INT_2ADDR: { - Binop_12x_shift<HUShr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x_shift<HUShr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::USHR_LONG_2ADDR: { - Binop_12x_shift<HUShr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x_shift<HUShr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::DIV_FLOAT_2ADDR: { - Binop_12x<HDiv>(instruction, Primitive::kPrimFloat, dex_pc); + Binop_12x<HDiv>(instruction, DataType::Type::kFloat32, dex_pc); break; } case Instruction::DIV_DOUBLE_2ADDR: { - Binop_12x<HDiv>(instruction, Primitive::kPrimDouble, dex_pc); + Binop_12x<HDiv>(instruction, DataType::Type::kFloat64, dex_pc); break; } case Instruction::AND_INT_2ADDR: { - Binop_12x<HAnd>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HAnd>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::AND_LONG_2ADDR: { - Binop_12x<HAnd>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HAnd>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::OR_INT_2ADDR: { - Binop_12x<HOr>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HOr>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::OR_LONG_2ADDR: { - Binop_12x<HOr>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HOr>(instruction, DataType::Type::kInt64, dex_pc); break; } case Instruction::XOR_INT_2ADDR: { - Binop_12x<HXor>(instruction, Primitive::kPrimInt, dex_pc); + Binop_12x<HXor>(instruction, DataType::Type::kInt32, dex_pc); break; } case Instruction::XOR_LONG_2ADDR: { - Binop_12x<HXor>(instruction, Primitive::kPrimLong, dex_pc); + Binop_12x<HXor>(instruction, DataType::Type::kInt64, dex_pc); break; } @@ -2532,14 +2679,14 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::DIV_INT_LIT16: case Instruction::DIV_INT_LIT8: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimInt, true, true); + dex_pc, DataType::Type::kInt32, true, true); break; } case Instruction::REM_INT_LIT16: case Instruction::REM_INT_LIT8: { BuildCheckedDivRem(instruction.VRegA(), instruction.VRegB(), instruction.VRegC(), - dex_pc, Primitive::kPrimInt, true, false); + dex_pc, DataType::Type::kInt32, true, false); break; } @@ -2570,10 +2717,10 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::NEW_ARRAY: { dex::TypeIndex type_index(instruction.VRegC_22c()); - HInstruction* length = LoadLocal(instruction.VRegB_22c(), Primitive::kPrimInt); + HInstruction* length = LoadLocal(instruction.VRegB_22c(), DataType::Type::kInt32); HLoadClass* cls = BuildLoadClass(type_index, dex_pc); - HNewArray* new_array = new (arena_) HNewArray(cls, length, dex_pc); + HNewArray* new_array = new (allocator_) HNewArray(cls, length, dex_pc); AppendInstruction(new_array); UpdateLocal(instruction.VRegA_22c(), current_block_->GetLastInstruction()); BuildConstructorFenceForAllocation(new_array); @@ -2624,27 +2771,27 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } case Instruction::CMP_LONG: { - Binop_23x_cmp(instruction, Primitive::kPrimLong, ComparisonBias::kNoBias, dex_pc); + Binop_23x_cmp(instruction, DataType::Type::kInt64, ComparisonBias::kNoBias, dex_pc); break; } case Instruction::CMPG_FLOAT: { - Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kGtBias, dex_pc); + Binop_23x_cmp(instruction, DataType::Type::kFloat32, ComparisonBias::kGtBias, dex_pc); break; } case Instruction::CMPG_DOUBLE: { - Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kGtBias, dex_pc); + Binop_23x_cmp(instruction, DataType::Type::kFloat64, ComparisonBias::kGtBias, dex_pc); break; } case Instruction::CMPL_FLOAT: { - Binop_23x_cmp(instruction, Primitive::kPrimFloat, ComparisonBias::kLtBias, dex_pc); + Binop_23x_cmp(instruction, DataType::Type::kFloat32, ComparisonBias::kLtBias, dex_pc); break; } case Instruction::CMPL_DOUBLE: { - Binop_23x_cmp(instruction, Primitive::kPrimDouble, ComparisonBias::kLtBias, dex_pc); + Binop_23x_cmp(instruction, DataType::Type::kFloat64, ComparisonBias::kLtBias, dex_pc); break; } @@ -2665,7 +2812,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IGET_CHAR_QUICK: case Instruction::IGET_SHORT: case Instruction::IGET_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, false, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ false, quicken_index)) { return false; } break; @@ -2685,7 +2832,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::IPUT_CHAR_QUICK: case Instruction::IPUT_SHORT: case Instruction::IPUT_SHORT_QUICK: { - if (!BuildInstanceFieldAccess(instruction, dex_pc, true, quicken_index)) { + if (!BuildInstanceFieldAccess(instruction, dex_pc, /* is_put */ true, quicken_index)) { return false; } break; @@ -2698,9 +2845,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SGET_BYTE: case Instruction::SGET_CHAR: case Instruction::SGET_SHORT: { - if (!BuildStaticFieldAccess(instruction, dex_pc, false)) { - return false; - } + BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ false); break; } @@ -2711,9 +2856,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, case Instruction::SPUT_BYTE: case Instruction::SPUT_CHAR: case Instruction::SPUT_SHORT: { - if (!BuildStaticFieldAccess(instruction, dex_pc, true)) { - return false; - } + BuildStaticFieldAccess(instruction, dex_pc, /* is_put */ true); break; } @@ -2727,33 +2870,31 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, break; \ } - ARRAY_XX(, Primitive::kPrimInt); - ARRAY_XX(_WIDE, Primitive::kPrimLong); - ARRAY_XX(_OBJECT, Primitive::kPrimNot); - ARRAY_XX(_BOOLEAN, Primitive::kPrimBoolean); - ARRAY_XX(_BYTE, Primitive::kPrimByte); - ARRAY_XX(_CHAR, Primitive::kPrimChar); - ARRAY_XX(_SHORT, Primitive::kPrimShort); + ARRAY_XX(, DataType::Type::kInt32); + ARRAY_XX(_WIDE, DataType::Type::kInt64); + ARRAY_XX(_OBJECT, DataType::Type::kReference); + ARRAY_XX(_BOOLEAN, DataType::Type::kBool); + ARRAY_XX(_BYTE, DataType::Type::kInt8); + ARRAY_XX(_CHAR, DataType::Type::kUint16); + ARRAY_XX(_SHORT, DataType::Type::kInt16); case Instruction::ARRAY_LENGTH: { HInstruction* object = LoadNullCheckedLocal(instruction.VRegB_12x(), dex_pc); - AppendInstruction(new (arena_) HArrayLength(object, dex_pc)); + AppendInstruction(new (allocator_) HArrayLength(object, dex_pc)); UpdateLocal(instruction.VRegA_12x(), current_block_->GetLastInstruction()); break; } case Instruction::CONST_STRING: { dex::StringIndex string_index(instruction.VRegB_21c()); - AppendInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc)); + BuildLoadString(string_index, dex_pc); UpdateLocal(instruction.VRegA_21c(), current_block_->GetLastInstruction()); break; } case Instruction::CONST_STRING_JUMBO: { dex::StringIndex string_index(instruction.VRegB_31c()); - AppendInstruction( - new (arena_) HLoadString(graph_->GetCurrentMethod(), string_index, *dex_file_, dex_pc)); + BuildLoadString(string_index, dex_pc); UpdateLocal(instruction.VRegA_31c(), current_block_->GetLastInstruction()); break; } @@ -2766,15 +2907,15 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } case Instruction::MOVE_EXCEPTION: { - AppendInstruction(new (arena_) HLoadException(dex_pc)); + AppendInstruction(new (allocator_) HLoadException(dex_pc)); UpdateLocal(instruction.VRegA_11x(), current_block_->GetLastInstruction()); - AppendInstruction(new (arena_) HClearException(dex_pc)); + AppendInstruction(new (allocator_) HClearException(dex_pc)); break; } case Instruction::THROW: { - HInstruction* exception = LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot); - AppendInstruction(new (arena_) HThrow(exception, dex_pc)); + HInstruction* exception = LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference); + AppendInstruction(new (allocator_) HThrow(exception, dex_pc)); // We finished building this block. Set the current block to null to avoid // adding dead instructions to it. current_block_ = nullptr; @@ -2797,16 +2938,16 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, } case Instruction::MONITOR_ENTER: { - AppendInstruction(new (arena_) HMonitorOperation( - LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot), + AppendInstruction(new (allocator_) HMonitorOperation( + LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference), HMonitorOperation::OperationKind::kEnter, dex_pc)); break; } case Instruction::MONITOR_EXIT: { - AppendInstruction(new (arena_) HMonitorOperation( - LoadLocal(instruction.VRegA_11x(), Primitive::kPrimNot), + AppendInstruction(new (allocator_) HMonitorOperation( + LoadLocal(instruction.VRegA_11x(), DataType::Type::kReference), HMonitorOperation::OperationKind::kExit, dex_pc)); break; @@ -2823,7 +2964,8 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex()) << " because of unhandled instruction " << instruction.Name(); - MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction); + MaybeRecordStat(compilation_stats_, + MethodCompilationStat::kNotCompiledUnhandledInstruction); return false; } return true; @@ -2832,7 +2974,7 @@ bool HInstructionBuilder::ProcessDexInstruction(const Instruction& instruction, ObjPtr<mirror::Class> HInstructionBuilder::LookupResolvedType( dex::TypeIndex type_index, const DexCompilationUnit& compilation_unit) const { - return ClassLinker::LookupResolvedType( + return compilation_unit.GetClassLinker()->LookupResolvedType( type_index, compilation_unit.GetDexCache().Get(), compilation_unit.GetClassLoader().Get()); } diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h index 5a83df3813..4428c53277 100644 --- a/compiler/optimizing/instruction_builder.h +++ b/compiler/optimizing/instruction_builder.h @@ -17,23 +17,34 @@ #ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_INSTRUCTION_BUILDER_H_ -#include "base/arena_containers.h" -#include "base/arena_object.h" -#include "block_builder.h" -#include "dex_file_types.h" -#include "driver/compiler_driver.h" -#include "driver/compiler_driver-inl.h" -#include "driver/dex_compilation_unit.h" -#include "mirror/dex_cache.h" +#include "base/array_ref.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "data_type.h" +#include "dex/code_item_accessors.h" +#include "dex/dex_file.h" +#include "dex/dex_file_types.h" +#include "handle.h" #include "nodes.h" -#include "optimizing_compiler_stats.h" #include "quicken_info.h" -#include "ssa_builder.h" namespace art { +class ArenaBitVector; +class ArtField; +class ArtMethod; class CodeGenerator; +class CompilerDriver; +class DexCompilationUnit; +class HBasicBlockBuilder; class Instruction; +class OptimizingCompilerStats; +class SsaBuilder; +class VariableSizedHandleScope; + +namespace mirror { +class Class; +} // namespace mirror class HInstructionBuilder : public ValueObject { public: @@ -41,64 +52,40 @@ class HInstructionBuilder : public ValueObject { HBasicBlockBuilder* block_builder, SsaBuilder* ssa_builder, const DexFile* dex_file, - const DexFile::CodeItem& code_item, - Primitive::Type return_type, - DexCompilationUnit* dex_compilation_unit, - const DexCompilationUnit* const outer_compilation_unit, - CompilerDriver* driver, + const CodeItemDebugInfoAccessor& accessor, + DataType::Type return_type, + const DexCompilationUnit* dex_compilation_unit, + const DexCompilationUnit* outer_compilation_unit, + CompilerDriver* compiler_driver, CodeGenerator* code_generator, - const uint8_t* interpreter_metadata, + ArrayRef<const uint8_t> interpreter_metadata, OptimizingCompilerStats* compiler_stats, - Handle<mirror::DexCache> dex_cache, - VariableSizedHandleScope* handles) - : arena_(graph->GetArena()), - graph_(graph), - handles_(handles), - dex_file_(dex_file), - code_item_(code_item), - return_type_(return_type), - block_builder_(block_builder), - ssa_builder_(ssa_builder), - locals_for_(arena_->Adapter(kArenaAllocGraphBuilder)), - current_block_(nullptr), - current_locals_(nullptr), - latest_result_(nullptr), - current_this_parameter_(nullptr), - compiler_driver_(driver), - code_generator_(code_generator), - dex_compilation_unit_(dex_compilation_unit), - outer_compilation_unit_(outer_compilation_unit), - quicken_info_(interpreter_metadata), - compilation_stats_(compiler_stats), - dex_cache_(dex_cache), - loop_headers_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) { - loop_headers_.reserve(kDefaultNumberOfLoops); - } + VariableSizedHandleScope* handles, + ScopedArenaAllocator* local_allocator); bool Build(); + void BuildIntrinsic(ArtMethod* method); private: - void MaybeRecordStat(MethodCompilationStat compilation_stat); - void InitializeBlockLocals(); void PropagateLocalsToCatchBlocks(); void SetLoopHeaderPhiInputs(); bool ProcessDexInstruction(const Instruction& instruction, uint32_t dex_pc, size_t quicken_index); - void FindNativeDebugInfoLocations(ArenaBitVector* locations); + ArenaBitVector* FindNativeDebugInfoLocations(); bool CanDecodeQuickenedInfo() const; uint16_t LookupQuickenedInfo(uint32_t quicken_index); HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const; - ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block); + ScopedArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block); // Out of line version of GetLocalsFor(), which has a fast path that is // beneficial to get inlined by callers. - ArenaVector<HInstruction*>* GetLocalsForWithAllocation( - HBasicBlock* block, ArenaVector<HInstruction*>* locals, const size_t vregs); + ScopedArenaVector<HInstruction*>* GetLocalsForWithAllocation( + HBasicBlock* block, ScopedArenaVector<HInstruction*>* locals, const size_t vregs); HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local); - HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const; + HInstruction* LoadLocal(uint32_t register_index, DataType::Type type) const; HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc); void UpdateLocal(uint32_t register_index, HInstruction* instruction); @@ -114,24 +101,24 @@ class HInstructionBuilder : public ValueObject { REQUIRES_SHARED(Locks::mutator_lock_); template<typename T> - void Unop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void Unop_12x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); template<typename T> - void Binop_23x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void Binop_23x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); template<typename T> - void Binop_23x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void Binop_23x_shift(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); void Binop_23x_cmp(const Instruction& instruction, - Primitive::Type type, + DataType::Type type, ComparisonBias bias, uint32_t dex_pc); template<typename T> - void Binop_12x(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void Binop_12x(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); template<typename T> - void Binop_12x_shift(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void Binop_12x_shift(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); template<typename T> void Binop_22b(const Instruction& instruction, bool reverse, uint32_t dex_pc); @@ -143,19 +130,19 @@ class HInstructionBuilder : public ValueObject { template<typename T> void If_22t(const Instruction& instruction, uint32_t dex_pc); void Conversion_12x(const Instruction& instruction, - Primitive::Type input_type, - Primitive::Type result_type, + DataType::Type input_type, + DataType::Type result_type, uint32_t dex_pc); void BuildCheckedDivRem(uint16_t out_reg, uint16_t first_reg, int64_t second_reg_or_constant, uint32_t dex_pc, - Primitive::Type type, + DataType::Type type, bool second_is_lit, bool is_div); - void BuildReturn(const Instruction& instruction, Primitive::Type type, uint32_t dex_pc); + void BuildReturn(const Instruction& instruction, DataType::Type type, uint32_t dex_pc); // Builds an instance field access node and returns whether the instruction is supported. bool BuildInstanceFieldAccess(const Instruction& instruction, @@ -166,14 +153,14 @@ class HInstructionBuilder : public ValueObject { void BuildUnresolvedStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put, - Primitive::Type field_type); - // Builds a static field access node and returns whether the instruction is supported. - bool BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put); + DataType::Type field_type); + // Builds a static field access node. + void BuildStaticFieldAccess(const Instruction& instruction, uint32_t dex_pc, bool is_put); void BuildArrayAccess(const Instruction& instruction, uint32_t dex_pc, bool is_get, - Primitive::Type anticipated_type); + DataType::Type anticipated_type); // Builds an invocation node and returns whether the instruction is supported. bool BuildInvoke(const Instruction& instruction, @@ -212,7 +199,7 @@ class HInstructionBuilder : public ValueObject { void BuildFillArrayData(HInstruction* object, const T* data, uint32_t element_count, - Primitive::Type anticipated_type, + DataType::Type anticipated_type, uint32_t dex_pc); // Fills the given object with data as specified in the fill-array-data @@ -232,9 +219,10 @@ class HInstructionBuilder : public ValueObject { // Builds an instruction sequence for a switch statement. void BuildSwitch(const Instruction& instruction, uint32_t dex_pc); - // Builds a `HLoadClass` loading the given `type_index`. If `outer` is true, - // this method will use the outer class's dex file to lookup the type at - // `type_index`. + // Builds a `HLoadString` loading the given `string_index`. + void BuildLoadString(dex::StringIndex string_index, uint32_t dex_pc); + + // Builds a `HLoadClass` loading the given `type_index`. HLoadClass* BuildLoadClass(dex::TypeIndex type_index, uint32_t dex_pc); HLoadClass* BuildLoadClass(dex::TypeIndex type_index, @@ -245,10 +233,10 @@ class HInstructionBuilder : public ValueObject { REQUIRES_SHARED(Locks::mutator_lock_); // Returns the outer-most compiling method's class. - mirror::Class* GetOutermostCompilingClass() const; + ObjPtr<mirror::Class> GetOutermostCompilingClass() const; // Returns the class whose method is being compiled. - mirror::Class* GetCompilingClass() const; + ObjPtr<mirror::Class> GetCompilingClass() const; // Returns whether `type_index` points to the outer-most compiling method's class. bool IsOutermostCompilingClass(dex::TypeIndex type_index) const; @@ -314,29 +302,19 @@ class HInstructionBuilder : public ValueObject { ObjPtr<mirror::Class> LookupReferrerClass() const REQUIRES_SHARED(Locks::mutator_lock_); - ArenaAllocator* const arena_; + ArenaAllocator* const allocator_; HGraph* const graph_; - VariableSizedHandleScope* handles_; + VariableSizedHandleScope* const handles_; // The dex file where the method being compiled is, and the bytecode data. const DexFile* const dex_file_; - const DexFile::CodeItem& code_item_; + const CodeItemDebugInfoAccessor code_item_accessor_; // null for intrinsic graph. // The return type of the method being compiled. - const Primitive::Type return_type_; + const DataType::Type return_type_; - HBasicBlockBuilder* block_builder_; - SsaBuilder* ssa_builder_; - - ArenaVector<ArenaVector<HInstruction*>> locals_for_; - HBasicBlock* current_block_; - ArenaVector<HInstruction*>* current_locals_; - HInstruction* latest_result_; - // Current "this" parameter. - // Valid only after InitializeParameters() finishes. - // * Null for static methods. - // * Non-null for instance methods. - HParameterValue* current_this_parameter_; + HBasicBlockBuilder* const block_builder_; + SsaBuilder* const ssa_builder_; CompilerDriver* const compiler_driver_; @@ -344,7 +322,7 @@ class HInstructionBuilder : public ValueObject { // The compilation unit of the current method being compiled. Note that // it can be an inlined method. - DexCompilationUnit* const dex_compilation_unit_; + const DexCompilationUnit* const dex_compilation_unit_; // The compilation unit of the outermost method being compiled. That is the // method being compiled (and not inlined), and potentially inlining other @@ -354,10 +332,20 @@ class HInstructionBuilder : public ValueObject { // Original values kept after instruction quickening. QuickenInfoTable quicken_info_; - OptimizingCompilerStats* compilation_stats_; - Handle<mirror::DexCache> dex_cache_; + OptimizingCompilerStats* const compilation_stats_; + + ScopedArenaAllocator* const local_allocator_; + ScopedArenaVector<ScopedArenaVector<HInstruction*>> locals_for_; + HBasicBlock* current_block_; + ScopedArenaVector<HInstruction*>* current_locals_; + HInstruction* latest_result_; + // Current "this" parameter. + // Valid only after InitializeParameters() finishes. + // * Null for static methods. + // * Non-null for instance methods. + HParameterValue* current_this_parameter_; - ArenaVector<HBasicBlock*> loop_headers_; + ScopedArenaVector<HBasicBlock*> loop_headers_; static constexpr int kDefaultNumberOfLoops = 2; diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index f2a8cc0333..a42a85dc1d 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -18,14 +18,19 @@ #include "art_method-inl.h" #include "class_linker-inl.h" +#include "data_type-inl.h" #include "escape.h" #include "intrinsics.h" #include "mirror/class-inl.h" -#include "sharpening.h" #include "scoped_thread_state_change-inl.h" +#include "sharpening.h" namespace art { +// Whether to run an exhaustive test of individual HInstructions cloning when each instruction +// is replaced with its copy if it is clonable. +static constexpr bool kTestInstructionClonerExhaustively = false; + class InstructionSimplifierVisitor : public HGraphDelegateVisitor { public: InstructionSimplifierVisitor(HGraph* graph, @@ -43,13 +48,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void RecordSimplification() { simplification_occurred_ = true; simplifications_at_current_position_++; - MaybeRecordStat(kInstructionSimplifications); - } - - void MaybeRecordStat(MethodCompilationStat stat) { - if (stats_ != nullptr) { - stats_->RecordStat(stat); - } + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplifications); } bool ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl); @@ -65,6 +64,7 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { bool TryDeMorganNegationFactoring(HBinaryOperation* op); bool TryHandleAssociativeAndCommutativeOperation(HBinaryOperation* instruction); bool TrySubtractionChainSimplification(HBinaryOperation* instruction); + bool TryCombineVecMultiplyAccumulate(HVecMul* mul); void VisitShift(HBinaryOperation* shift); @@ -104,13 +104,14 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitInvoke(HInvoke* invoke) OVERRIDE; void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE; + void VisitVecMul(HVecMul* instruction) OVERRIDE; bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; - void SimplifyRotate(HInvoke* invoke, bool is_left, Primitive::Type type); + void SimplifyRotate(HInvoke* invoke, bool is_left, DataType::Type type); void SimplifySystemArrayCopy(HInvoke* invoke); void SimplifyStringEquals(HInvoke* invoke); - void SimplifyCompare(HInvoke* invoke, bool is_signum, Primitive::Type type); + void SimplifyCompare(HInvoke* invoke, bool is_signum, DataType::Type type); void SimplifyIsNaN(HInvoke* invoke); void SimplifyFP2Int(HInvoke* invoke); void SimplifyStringCharAt(HInvoke* invoke); @@ -133,6 +134,11 @@ class InstructionSimplifierVisitor : public HGraphDelegateVisitor { }; void InstructionSimplifier::Run() { + if (kTestInstructionClonerExhaustively) { + CloneAndReplaceInstructionVisitor visitor(graph_); + visitor.VisitReversePostOrder(); + } + InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_); visitor.Run(); } @@ -182,14 +188,14 @@ bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation // Note that we cannot optimize `(-a) + (-b)` to `-(a + b)` for floating-point. // When `a` is `-0.0` and `b` is `0.0`, the former expression yields `0.0`, // while the later yields `-0.0`. - if (!Primitive::IsIntegralType(binop->GetType())) { + if (!DataType::IsIntegralType(binop->GetType())) { return false; } binop->ReplaceInput(left_neg->GetInput(), 0); binop->ReplaceInput(right_neg->GetInput(), 1); left_neg->GetBlock()->RemoveInstruction(left_neg); right_neg->GetBlock()->RemoveInstruction(right_neg); - HNeg* neg = new (GetGraph()->GetArena()) HNeg(binop->GetType(), binop); + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(binop->GetType(), binop); binop->GetBlock()->InsertInstructionBefore(neg, binop->GetNext()); binop->ReplaceWithExceptInReplacementAtIndex(neg, 0); RecordSimplification(); @@ -198,7 +204,7 @@ bool InstructionSimplifierVisitor::TryMoveNegOnInputsAfterBinop(HBinaryOperation bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation* op) { DCHECK(op->IsAnd() || op->IsOr()) << op->DebugName(); - Primitive::Type type = op->GetType(); + DataType::Type type = op->GetType(); HInstruction* left = op->GetLeft(); HInstruction* right = op->GetRight(); @@ -228,15 +234,15 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation // Replace the `HAnd` or `HOr`. HBinaryOperation* hbin; if (op->IsAnd()) { - hbin = new (GetGraph()->GetArena()) HOr(type, src_left, src_right, dex_pc); + hbin = new (GetGraph()->GetAllocator()) HOr(type, src_left, src_right, dex_pc); } else { - hbin = new (GetGraph()->GetArena()) HAnd(type, src_left, src_right, dex_pc); + hbin = new (GetGraph()->GetAllocator()) HAnd(type, src_left, src_right, dex_pc); } HInstruction* hnot; if (left->IsBooleanNot()) { - hnot = new (GetGraph()->GetArena()) HBooleanNot(hbin, dex_pc); + hnot = new (GetGraph()->GetAllocator()) HBooleanNot(hbin, dex_pc); } else { - hnot = new (GetGraph()->GetArena()) HNot(type, hbin, dex_pc); + hnot = new (GetGraph()->GetAllocator()) HNot(type, hbin, dex_pc); } op->GetBlock()->InsertInstructionBefore(hbin, op); @@ -249,12 +255,93 @@ bool InstructionSimplifierVisitor::TryDeMorganNegationFactoring(HBinaryOperation return false; } +bool InstructionSimplifierVisitor::TryCombineVecMultiplyAccumulate(HVecMul* mul) { + DataType::Type type = mul->GetPackedType(); + InstructionSet isa = codegen_->GetInstructionSet(); + switch (isa) { + case InstructionSet::kArm64: + if (!(type == DataType::Type::kUint8 || + type == DataType::Type::kInt8 || + type == DataType::Type::kUint16 || + type == DataType::Type::kInt16 || + type == DataType::Type::kInt32)) { + return false; + } + break; + case InstructionSet::kMips: + case InstructionSet::kMips64: + if (!(type == DataType::Type::kUint8 || + type == DataType::Type::kInt8 || + type == DataType::Type::kUint16 || + type == DataType::Type::kInt16 || + type == DataType::Type::kInt32 || + type == DataType::Type::kInt64)) { + return false; + } + break; + default: + return false; + } + + ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); + + if (mul->HasOnlyOneNonEnvironmentUse()) { + HInstruction* use = mul->GetUses().front().GetUser(); + if (use->IsVecAdd() || use->IsVecSub()) { + // Replace code looking like + // VECMUL tmp, x, y + // VECADD/SUB dst, acc, tmp + // with + // VECMULACC dst, acc, x, y + // Note that we do not want to (unconditionally) perform the merge when the + // multiplication has multiple uses and it can be merged in all of them. + // Multiple uses could happen on the same control-flow path, and we would + // then increase the amount of work. In the future we could try to evaluate + // whether all uses are on different control-flow paths (using dominance and + // reverse-dominance information) and only perform the merge when they are. + HInstruction* accumulator = nullptr; + HVecBinaryOperation* binop = use->AsVecBinaryOperation(); + HInstruction* binop_left = binop->GetLeft(); + HInstruction* binop_right = binop->GetRight(); + // This is always true since the `HVecMul` has only one use (which is checked above). + DCHECK_NE(binop_left, binop_right); + if (binop_right == mul) { + accumulator = binop_left; + } else if (use->IsVecAdd()) { + DCHECK_EQ(binop_left, mul); + accumulator = binop_right; + } + + HInstruction::InstructionKind kind = + use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; + if (accumulator != nullptr) { + HVecMultiplyAccumulate* mulacc = + new (allocator) HVecMultiplyAccumulate(allocator, + kind, + accumulator, + mul->GetLeft(), + mul->GetRight(), + binop->GetPackedType(), + binop->GetVectorLength(), + binop->GetDexPc()); + + binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); + DCHECK(!mul->HasUses()); + mul->GetBlock()->RemoveInstruction(mul); + return true; + } + } + } + + return false; +} + void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { DCHECK(instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()); HInstruction* shift_amount = instruction->GetRight(); HInstruction* value = instruction->GetLeft(); - int64_t implicit_mask = (value->GetType() == Primitive::kPrimLong) + int64_t implicit_mask = (value->GetType() == DataType::Type::kInt64) ? kMaxLongShiftDistance : kMaxIntShiftDistance; @@ -277,7 +364,7 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { // SHL dst, value, cst & implicit_mask // (as defined by shift semantics). This ensures other // optimizations do not need to special case for such situations. - DCHECK_EQ(shift_amount->GetType(), Primitive::kPrimInt); + DCHECK_EQ(shift_amount->GetType(), DataType::Type::kInt32); instruction->ReplaceInput(GetGraph()->GetIntConstant(masked_cst), /* index */ 1); RecordSimplification(); return; @@ -285,18 +372,36 @@ void InstructionSimplifierVisitor::VisitShift(HBinaryOperation* instruction) { } // Shift operations implicitly mask the shift amount according to the type width. Get rid of - // unnecessary explicit masking operations on the shift amount. + // unnecessary And/Or/Xor/Add/Sub/TypeConversion operations on the shift amount that do not + // affect the relevant bits. // Replace code looking like - // AND masked_shift, shift, <superset of implicit mask> - // SHL dst, value, masked_shift + // AND adjusted_shift, shift, <superset of implicit mask> + // [OR/XOR/ADD/SUB adjusted_shift, shift, <value not overlapping with implicit mask>] + // [<conversion-from-integral-non-64-bit-type> adjusted_shift, shift] + // SHL dst, value, adjusted_shift // with // SHL dst, value, shift - if (shift_amount->IsAnd()) { - HAnd* and_insn = shift_amount->AsAnd(); - HConstant* mask = and_insn->GetConstantRight(); - if ((mask != nullptr) && ((Int64FromConstant(mask) & implicit_mask) == implicit_mask)) { - instruction->ReplaceInput(and_insn->GetLeastConstantLeft(), 1); + if (shift_amount->IsAnd() || + shift_amount->IsOr() || + shift_amount->IsXor() || + shift_amount->IsAdd() || + shift_amount->IsSub()) { + int64_t required_result = shift_amount->IsAnd() ? implicit_mask : 0; + HBinaryOperation* bin_op = shift_amount->AsBinaryOperation(); + HConstant* mask = bin_op->GetConstantRight(); + if (mask != nullptr && (Int64FromConstant(mask) & implicit_mask) == required_result) { + instruction->ReplaceInput(bin_op->GetLeastConstantLeft(), 1); RecordSimplification(); + return; + } + } else if (shift_amount->IsTypeConversion()) { + DCHECK_NE(shift_amount->GetType(), DataType::Type::kBool); // We never convert to bool. + DataType::Type source_type = shift_amount->InputAt(0)->GetType(); + // Non-integral and 64-bit source types require an explicit type conversion. + if (DataType::IsIntegralType(source_type) && !DataType::Is64BitType(source_type)) { + instruction->ReplaceInput(shift_amount->AsTypeConversion()->GetInput(), 1); + RecordSimplification(); + return; } } } @@ -311,7 +416,8 @@ bool InstructionSimplifierVisitor::ReplaceRotateWithRor(HBinaryOperation* op, HUShr* ushr, HShl* shl) { DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()) << op->DebugName(); - HRor* ror = new (GetGraph()->GetArena()) HRor(ushr->GetType(), ushr->GetLeft(), ushr->GetRight()); + HRor* ror = + new (GetGraph()->GetAllocator()) HRor(ushr->GetType(), ushr->GetLeft(), ushr->GetRight()); op->GetBlock()->ReplaceAndRemoveInstructionWith(op, ror); if (!ushr->HasUses()) { ushr->GetBlock()->RemoveInstruction(ushr); @@ -338,7 +444,7 @@ bool InstructionSimplifierVisitor::TryReplaceWithRotate(HBinaryOperation* op) { if ((left->IsUShr() && right->IsShl()) || (left->IsShl() && right->IsUShr())) { HUShr* ushr = left->IsUShr() ? left->AsUShr() : right->AsUShr(); HShl* shl = left->IsShl() ? left->AsShl() : right->AsShl(); - DCHECK(Primitive::IsIntOrLongType(ushr->GetType())); + DCHECK(DataType::IsIntOrLongType(ushr->GetType())); if (ushr->GetType() == shl->GetType() && ushr->GetLeft() == shl->GetLeft()) { if (ushr->GetRight()->IsConstant() && shl->GetRight()->IsConstant()) { @@ -371,7 +477,7 @@ bool InstructionSimplifierVisitor::TryReplaceWithRotateConstantPattern(HBinaryOp HUShr* ushr, HShl* shl) { DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); - size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + size_t reg_bits = DataType::Size(ushr->GetType()) * kBitsPerByte; size_t rdist = Int64FromConstant(ushr->GetRight()->AsConstant()); size_t ldist = Int64FromConstant(shl->GetRight()->AsConstant()); if (((ldist + rdist) & (reg_bits - 1)) == 0) { @@ -432,7 +538,7 @@ bool InstructionSimplifierVisitor::TryReplaceWithRotateRegisterSubPattern(HBinar HShl* shl) { DCHECK(op->IsAdd() || op->IsXor() || op->IsOr()); DCHECK(ushr->GetRight()->IsSub() || shl->GetRight()->IsSub()); - size_t reg_bits = Primitive::ComponentSize(ushr->GetType()) * kBitsPerByte; + size_t reg_bits = DataType::Size(ushr->GetType()) * kBitsPerByte; HInstruction* shl_shift = shl->GetRight(); HInstruction* ushr_shift = ushr->GetRight(); if ((shl_shift->IsSub() && IsSubRegBitsMinusOther(shl_shift->AsSub(), reg_bits, ushr_shift)) || @@ -517,7 +623,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { if (object->IsNullConstant()) { check_cast->GetBlock()->RemoveInstruction(check_cast); - MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); return; } @@ -527,7 +633,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { if (outcome) { check_cast->GetBlock()->RemoveInstruction(check_cast); - MaybeRecordStat(MethodCompilationStat::kRemovedCheckedCast); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedCheckedCast); if (!load_class->HasUses()) { // We cannot rely on DCE to remove the class because the `HLoadClass` thinks it can throw. // However, here we know that it cannot because the checkcast was successfull, hence @@ -557,7 +663,7 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HGraph* graph = GetGraph(); if (object->IsNullConstant()) { - MaybeRecordStat(kRemovedInstanceOf); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); instruction->ReplaceWith(graph->GetIntConstant(0)); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); @@ -568,10 +674,10 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { // the return value check with the `outcome` check, b/27651442 . bool outcome = false; if (TypeCheckHasKnownOutcome(load_class, object, &outcome)) { - MaybeRecordStat(kRemovedInstanceOf); + MaybeRecordStat(stats_, MethodCompilationStat::kRemovedInstanceOf); if (outcome && can_be_null) { // Type test will succeed, we just need a null test. - HNotEqual* test = new (graph->GetArena()) HNotEqual(graph->GetNullConstant(), object); + HNotEqual* test = new (graph->GetAllocator()) HNotEqual(graph->GetNullConstant(), object); instruction->GetBlock()->InsertInstructionBefore(test, instruction); instruction->ReplaceWith(test); } else { @@ -590,43 +696,43 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { } void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { - if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) + if ((instruction->GetValue()->GetType() == DataType::Type::kReference) && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { - if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) + if ((instruction->GetValue()->GetType() == DataType::Type::kReference) && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } -static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstruction* cond) { +static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* allocator, HInstruction* cond) { HInstruction *lhs = cond->InputAt(0); HInstruction *rhs = cond->InputAt(1); switch (cond->GetKind()) { case HInstruction::kEqual: - return new (arena) HEqual(rhs, lhs); + return new (allocator) HEqual(rhs, lhs); case HInstruction::kNotEqual: - return new (arena) HNotEqual(rhs, lhs); + return new (allocator) HNotEqual(rhs, lhs); case HInstruction::kLessThan: - return new (arena) HGreaterThan(rhs, lhs); + return new (allocator) HGreaterThan(rhs, lhs); case HInstruction::kLessThanOrEqual: - return new (arena) HGreaterThanOrEqual(rhs, lhs); + return new (allocator) HGreaterThanOrEqual(rhs, lhs); case HInstruction::kGreaterThan: - return new (arena) HLessThan(rhs, lhs); + return new (allocator) HLessThan(rhs, lhs); case HInstruction::kGreaterThanOrEqual: - return new (arena) HLessThanOrEqual(rhs, lhs); + return new (allocator) HLessThanOrEqual(rhs, lhs); case HInstruction::kBelow: - return new (arena) HAbove(rhs, lhs); + return new (allocator) HAbove(rhs, lhs); case HInstruction::kBelowOrEqual: - return new (arena) HAboveOrEqual(rhs, lhs); + return new (allocator) HAboveOrEqual(rhs, lhs); case HInstruction::kAbove: - return new (arena) HBelow(rhs, lhs); + return new (allocator) HBelow(rhs, lhs); case HInstruction::kAboveOrEqual: - return new (arena) HBelowOrEqual(rhs, lhs); + return new (allocator) HBelowOrEqual(rhs, lhs); default: LOG(FATAL) << "Unknown ConditionType " << cond->GetKind(); } @@ -634,7 +740,7 @@ static HCondition* GetOppositeConditionSwapOps(ArenaAllocator* arena, HInstructi } static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) { - if (input->GetType() == Primitive::kPrimBoolean) { + if (input->GetType() == DataType::Type::kBool) { return true; // input has direct boolean type } else if (cmp->GetUses().HasExactlyOneElement()) { // Comparison also has boolean type if both its input and the instruction @@ -727,7 +833,7 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { } else if (input->IsCondition() && // Don't change FP compares. The definition of compares involving // NaNs forces the compares to be done as written by the user. - !Primitive::IsFloatingPointType(input->InputAt(0)->GetType())) { + !DataType::IsFloatingPointType(input->InputAt(0)->GetType())) { // Replace condition with its opposite. replace_with = GetGraph()->InsertOppositeCondition(input->AsCondition(), bool_not); } @@ -739,6 +845,60 @@ void InstructionSimplifierVisitor::VisitBooleanNot(HBooleanNot* bool_not) { } } +// Constructs a new ABS(x) node in the HIR. +static HInstruction* NewIntegralAbs(ArenaAllocator* allocator, + HInstruction* x, + HInstruction* cursor) { + DataType::Type type = x->GetType(); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + // Construct a fake intrinsic with as much context as is needed to allocate one. + // The intrinsic will always be lowered into code later anyway. + // TODO: b/65164101 : moving towards a real HAbs node makes more sense. + HInvokeStaticOrDirect::DispatchInfo dispatch_info = { + HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress, + HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod, + 0u + }; + HInvokeStaticOrDirect* invoke = new (allocator) HInvokeStaticOrDirect( + allocator, + 1, + type, + x->GetDexPc(), + /*method_idx*/ -1, + /*resolved_method*/ nullptr, + dispatch_info, + kStatic, + MethodReference(nullptr, dex::kDexNoIndex), + HInvokeStaticOrDirect::ClinitCheckRequirement::kNone); + invoke->SetArgumentAt(0, x); + invoke->SetIntrinsic(type == DataType::Type::kInt32 ? Intrinsics::kMathAbsInt + : Intrinsics::kMathAbsLong, + kNoEnvironmentOrCache, + kNoSideEffects, + kNoThrow); + cursor->GetBlock()->InsertInstructionBefore(invoke, cursor); + return invoke; +} + +// Returns true if operands a and b consists of widening type conversions +// (either explicit or implicit) to the given to_type. +static bool AreLowerPrecisionArgs(DataType::Type to_type, HInstruction* a, HInstruction* b) { + if (a->IsTypeConversion() && a->GetType() == to_type) { + a = a->InputAt(0); + } + if (b->IsTypeConversion() && b->GetType() == to_type) { + b = b->InputAt(0); + } + DataType::Type type1 = a->GetType(); + DataType::Type type2 = b->GetType(); + return (type1 == DataType::Type::kUint8 && type2 == DataType::Type::kUint8) || + (type1 == DataType::Type::kInt8 && type2 == DataType::Type::kInt8) || + (type1 == DataType::Type::kInt16 && type2 == DataType::Type::kInt16) || + (type1 == DataType::Type::kUint16 && type2 == DataType::Type::kUint16) || + (type1 == DataType::Type::kInt32 && type2 == DataType::Type::kInt32 && + to_type == DataType::Type::kInt64); +} + void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { HInstruction* replace_with = nullptr; HInstruction* condition = select->GetCondition(); @@ -775,6 +935,48 @@ void InstructionSimplifierVisitor::VisitSelect(HSelect* select) { // Replace (cond ? false : true) with (!cond). replace_with = GetGraph()->InsertOppositeCondition(condition, select); } + } else if (condition->IsCondition()) { + IfCondition cmp = condition->AsCondition()->GetCondition(); + HInstruction* a = condition->InputAt(0); + HInstruction* b = condition->InputAt(1); + DataType::Type t_type = true_value->GetType(); + DataType::Type f_type = false_value->GetType(); + // Here we have a <cmp> b ? true_value : false_value. + // Test if both values are same-typed int or long. + if (t_type == f_type && + (t_type == DataType::Type::kInt32 || t_type == DataType::Type::kInt64)) { + // Try to replace typical integral ABS constructs. + if (true_value->IsNeg()) { + HInstruction* negated = true_value->InputAt(0); + if ((cmp == kCondLT || cmp == kCondLE) && + (a == negated && a == false_value && IsInt64Value(b, 0))) { + // Found a < 0 ? -a : a which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), false_value, select); + } + } else if (false_value->IsNeg()) { + HInstruction* negated = false_value->InputAt(0); + if ((cmp == kCondGT || cmp == kCondGE) && + (a == true_value && a == negated && IsInt64Value(b, 0))) { + // Found a > 0 ? a : -a which can be replaced by ABS(a). + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + } + } else if (true_value->IsSub() && false_value->IsSub()) { + HInstruction* true_sub1 = true_value->InputAt(0); + HInstruction* true_sub2 = true_value->InputAt(1); + HInstruction* false_sub1 = false_value->InputAt(0); + HInstruction* false_sub2 = false_value->InputAt(1); + if ((((cmp == kCondGT || cmp == kCondGE) && + (a == true_sub1 && b == true_sub2 && a == false_sub2 && b == false_sub1)) || + ((cmp == kCondLT || cmp == kCondLE) && + (a == true_sub2 && b == true_sub1 && a == false_sub1 && b == false_sub2))) && + AreLowerPrecisionArgs(t_type, a, b)) { + // Found a > b ? a - b : b - a or + // a < b ? b - a : a - b + // which can be replaced by ABS(a - b) for lower precision operands a, b. + replace_with = NewIntegralAbs(GetGraph()->GetAllocator(), true_value, select); + } + } + } } if (replace_with != nullptr) { @@ -808,7 +1010,9 @@ void InstructionSimplifierVisitor::VisitArrayLength(HArrayLength* instruction) { void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { HInstruction* value = instruction->GetValue(); - if (value->GetType() != Primitive::kPrimNot) return; + if (value->GetType() != DataType::Type::kReference) { + return; + } if (CanEnsureNotNullAt(value, instruction)) { instruction->ClearValueCanBeNull(); @@ -848,40 +1052,92 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { } } -static bool IsTypeConversionImplicit(Primitive::Type input_type, Primitive::Type result_type) { - // Invariant: We should never generate a conversion to a Boolean value. - DCHECK_NE(Primitive::kPrimBoolean, result_type); - - // Besides conversion to the same type, widening integral conversions are implicit, - // excluding conversions to long and the byte->char conversion where we need to - // clear the high 16 bits of the 32-bit sign-extended representation of byte. - return result_type == input_type || - (result_type == Primitive::kPrimInt && (input_type == Primitive::kPrimBoolean || - input_type == Primitive::kPrimByte || - input_type == Primitive::kPrimShort || - input_type == Primitive::kPrimChar)) || - (result_type == Primitive::kPrimChar && input_type == Primitive::kPrimBoolean) || - (result_type == Primitive::kPrimShort && (input_type == Primitive::kPrimBoolean || - input_type == Primitive::kPrimByte)) || - (result_type == Primitive::kPrimByte && input_type == Primitive::kPrimBoolean); -} - -static bool IsTypeConversionLossless(Primitive::Type input_type, Primitive::Type result_type) { +static bool IsTypeConversionLossless(DataType::Type input_type, DataType::Type result_type) { + // Make sure all implicit conversions have been simplified and no new ones have been introduced. + DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type)) + << input_type << "," << result_type; // The conversion to a larger type is loss-less with the exception of two cases, - // - conversion to char, the only unsigned type, where we may lose some bits, and + // - conversion to the unsigned type Uint16, where we may lose some bits, and // - conversion from float to long, the only FP to integral conversion with smaller FP type. // For integral to FP conversions this holds because the FP mantissa is large enough. - DCHECK_NE(input_type, result_type); - return Primitive::ComponentSize(result_type) > Primitive::ComponentSize(input_type) && - result_type != Primitive::kPrimChar && - !(result_type == Primitive::kPrimLong && input_type == Primitive::kPrimFloat); + // Note: The size check excludes Uint8 as the result type. + return DataType::Size(result_type) > DataType::Size(input_type) && + result_type != DataType::Type::kUint16 && + !(result_type == DataType::Type::kInt64 && input_type == DataType::Type::kFloat32); +} + +static inline bool TryReplaceFieldOrArrayGetType(HInstruction* maybe_get, DataType::Type new_type) { + if (maybe_get->IsInstanceFieldGet()) { + maybe_get->AsInstanceFieldGet()->SetType(new_type); + return true; + } else if (maybe_get->IsStaticFieldGet()) { + maybe_get->AsStaticFieldGet()->SetType(new_type); + return true; + } else if (maybe_get->IsArrayGet() && !maybe_get->AsArrayGet()->IsStringCharAt()) { + maybe_get->AsArrayGet()->SetType(new_type); + return true; + } else { + return false; + } +} + +// The type conversion is only used for storing into a field/element of the +// same/narrower size. +static bool IsTypeConversionForStoringIntoNoWiderFieldOnly(HTypeConversion* type_conversion) { + if (type_conversion->HasEnvironmentUses()) { + return false; + } + DataType::Type input_type = type_conversion->GetInputType(); + DataType::Type result_type = type_conversion->GetResultType(); + if (!DataType::IsIntegralType(input_type) || + !DataType::IsIntegralType(result_type) || + input_type == DataType::Type::kInt64 || + result_type == DataType::Type::kInt64) { + // Type conversion is needed if non-integer types are involved, or 64-bit + // types are involved, which may use different number of registers. + return false; + } + if (DataType::Size(input_type) >= DataType::Size(result_type)) { + // Type conversion is not necessary when storing to a field/element of the + // same/smaller size. + } else { + // We do not handle this case here. + return false; + } + + // Check if the converted value is only used for storing into heap. + for (const HUseListNode<HInstruction*>& use : type_conversion->GetUses()) { + HInstruction* instruction = use.GetUser(); + if (instruction->IsInstanceFieldSet() && + instruction->AsInstanceFieldSet()->GetFieldType() == result_type) { + DCHECK_EQ(instruction->AsInstanceFieldSet()->GetValue(), type_conversion); + continue; + } + if (instruction->IsStaticFieldSet() && + instruction->AsStaticFieldSet()->GetFieldType() == result_type) { + DCHECK_EQ(instruction->AsStaticFieldSet()->GetValue(), type_conversion); + continue; + } + if (instruction->IsArraySet() && + instruction->AsArraySet()->GetComponentType() == result_type && + // not index use. + instruction->AsArraySet()->GetIndex() != type_conversion) { + DCHECK_EQ(instruction->AsArraySet()->GetValue(), type_conversion); + continue; + } + // The use is not as a store value, or the field/element type is not the + // same as the result_type, keep the type conversion. + return false; + } + // Codegen automatically handles the type conversion during the store. + return true; } void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruction) { HInstruction* input = instruction->GetInput(); - Primitive::Type input_type = input->GetType(); - Primitive::Type result_type = instruction->GetResultType(); - if (IsTypeConversionImplicit(input_type, result_type)) { + DataType::Type input_type = input->GetType(); + DataType::Type result_type = instruction->GetResultType(); + if (DataType::IsTypeConversionImplicit(input_type, result_type)) { // Remove the implicit conversion; this includes conversion to the same type. instruction->ReplaceWith(input); instruction->GetBlock()->RemoveInstruction(instruction); @@ -892,7 +1148,7 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct if (input->IsTypeConversion()) { HTypeConversion* input_conversion = input->AsTypeConversion(); HInstruction* original_input = input_conversion->GetInput(); - Primitive::Type original_type = original_input->GetType(); + DataType::Type original_type = original_input->GetType(); // When the first conversion is lossless, a direct conversion from the original type // to the final type yields the same result, even for a lossy second conversion, for @@ -903,14 +1159,14 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct // doesn't need, i.e. the final type is no wider than the intermediate. If so, direct // conversion yields the same result, for example long->int->short or int->char->short. bool integral_conversions_with_non_widening_second = - Primitive::IsIntegralType(input_type) && - Primitive::IsIntegralType(original_type) && - Primitive::IsIntegralType(result_type) && - Primitive::ComponentSize(result_type) <= Primitive::ComponentSize(input_type); + DataType::IsIntegralType(input_type) && + DataType::IsIntegralType(original_type) && + DataType::IsIntegralType(result_type) && + DataType::Size(result_type) <= DataType::Size(input_type); if (is_first_conversion_lossless || integral_conversions_with_non_widening_second) { // If the merged conversion is implicit, do the simplification unconditionally. - if (IsTypeConversionImplicit(original_type, result_type)) { + if (DataType::IsTypeConversionImplicit(original_type, result_type)) { instruction->ReplaceWith(original_input); instruction->GetBlock()->RemoveInstruction(instruction); if (!input_conversion->HasUses()) { @@ -928,18 +1184,18 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct return; } } - } else if (input->IsAnd() && Primitive::IsIntegralType(result_type)) { - DCHECK(Primitive::IsIntegralType(input_type)); + } else if (input->IsAnd() && DataType::IsIntegralType(result_type)) { + DCHECK(DataType::IsIntegralType(input_type)); HAnd* input_and = input->AsAnd(); HConstant* constant = input_and->GetConstantRight(); if (constant != nullptr) { int64_t value = Int64FromConstant(constant); DCHECK_NE(value, -1); // "& -1" would have been optimized away in VisitAnd(). size_t trailing_ones = CTZ(~static_cast<uint64_t>(value)); - if (trailing_ones >= kBitsPerByte * Primitive::ComponentSize(result_type)) { + if (trailing_ones >= kBitsPerByte * DataType::Size(result_type)) { // The `HAnd` is useless, for example in `(byte) (x & 0xff)`, get rid of it. HInstruction* original_input = input_and->GetLeastConstantLeft(); - if (IsTypeConversionImplicit(original_input->GetType(), result_type)) { + if (DataType::IsTypeConversionImplicit(original_input->GetType(), result_type)) { instruction->ReplaceWith(original_input); instruction->GetBlock()->RemoveInstruction(instruction); RecordSimplification(); @@ -952,13 +1208,32 @@ void InstructionSimplifierVisitor::VisitTypeConversion(HTypeConversion* instruct } } } + } else if (input->HasOnlyOneNonEnvironmentUse() && + ((input_type == DataType::Type::kInt8 && result_type == DataType::Type::kUint8) || + (input_type == DataType::Type::kUint8 && result_type == DataType::Type::kInt8) || + (input_type == DataType::Type::kInt16 && result_type == DataType::Type::kUint16) || + (input_type == DataType::Type::kUint16 && result_type == DataType::Type::kInt16))) { + // Try to modify the type of the load to `result_type` and remove the explicit type conversion. + if (TryReplaceFieldOrArrayGetType(input, result_type)) { + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; + } + } + + if (IsTypeConversionForStoringIntoNoWiderFieldOnly(instruction)) { + instruction->ReplaceWith(input); + instruction->GetBlock()->RemoveInstruction(instruction); + RecordSimplification(); + return; } } void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - bool integral_type = Primitive::IsIntegralType(instruction->GetType()); + bool integral_type = DataType::IsIntegralType(instruction->GetType()); if ((input_cst != nullptr) && input_cst->IsArithmeticZero()) { // Replace code looking like // ADD dst, src, 0 @@ -998,7 +1273,8 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { // particular, we do not want the live range of `b` to be extended if we are // not sure the initial 'NEG' instruction can be removed. HInstruction* other = left_is_neg ? right : left; - HSub* sub = new(GetGraph()->GetArena()) HSub(instruction->GetType(), other, neg->GetInput()); + HSub* sub = + new(GetGraph()->GetAllocator()) HSub(instruction->GetType(), other, neg->GetInput()); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, sub); RecordSimplification(); neg->GetBlock()->RemoveInstruction(neg); @@ -1041,12 +1317,16 @@ void InstructionSimplifierVisitor::VisitAdd(HAdd* instruction) { } void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { + DCHECK(DataType::IsIntegralType(instruction->GetType())); HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); if (input_cst != nullptr) { int64_t value = Int64FromConstant(input_cst); - if (value == -1) { + if (value == -1 || + // Similar cases under zero extension. + (DataType::IsUnsignedType(input_other->GetType()) && + ((DataType::MaxValueOfIntegralType(input_other->GetType()) & ~value) == 0))) { // Replace code looking like // AND dst, src, 0xFFF...FF // with @@ -1056,11 +1336,42 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { RecordSimplification(); return; } + if (input_other->IsTypeConversion() && + input_other->GetType() == DataType::Type::kInt64 && + DataType::IsIntegralType(input_other->InputAt(0)->GetType()) && + IsInt<32>(value) && + input_other->HasOnlyOneNonEnvironmentUse()) { + // The AND can be reordered before the TypeConversion. Replace + // LongConstant cst, <32-bit-constant-sign-extended-to-64-bits> + // TypeConversion<Int64> tmp, src + // AND dst, tmp, cst + // with + // IntConstant cst, <32-bit-constant> + // AND tmp, src, cst + // TypeConversion<Int64> dst, tmp + // This helps 32-bit targets and does not hurt 64-bit targets. + // This also simplifies detection of other patterns, such as Uint8 loads. + HInstruction* new_and_input = input_other->InputAt(0); + // Implicit conversion Int64->Int64 would have been removed previously. + DCHECK_NE(new_and_input->GetType(), DataType::Type::kInt64); + HConstant* new_const = GetGraph()->GetConstant(DataType::Type::kInt32, value); + HAnd* new_and = + new (GetGraph()->GetAllocator()) HAnd(DataType::Type::kInt32, new_and_input, new_const); + instruction->GetBlock()->InsertInstructionBefore(new_and, instruction); + HTypeConversion* new_conversion = + new (GetGraph()->GetAllocator()) HTypeConversion(DataType::Type::kInt64, new_and); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_conversion); + input_other->GetBlock()->RemoveInstruction(input_other); + RecordSimplification(); + // Try to process the new And now, do not wait for the next round of simplifications. + instruction = new_and; + input_other = new_and_input; + } // Eliminate And from UShr+And if the And-mask contains all the bits that // can be non-zero after UShr. Transform Shr+And to UShr if the And-mask // precisely clears the shifted-in sign bits. if ((input_other->IsUShr() || input_other->IsShr()) && input_other->InputAt(1)->IsConstant()) { - size_t reg_bits = (instruction->GetResultType() == Primitive::kPrimLong) ? 64 : 32; + size_t reg_bits = (instruction->GetResultType() == DataType::Type::kInt64) ? 64 : 32; size_t shift = Int64FromConstant(input_other->InputAt(1)->AsConstant()) & (reg_bits - 1); size_t num_tail_bits_set = CTZ(value + 1); if ((num_tail_bits_set >= reg_bits - shift) && input_other->IsUShr()) { @@ -1073,16 +1384,38 @@ void InstructionSimplifierVisitor::VisitAnd(HAnd* instruction) { input_other->HasOnlyOneNonEnvironmentUse()) { DCHECK(input_other->IsShr()); // For UShr, we would have taken the branch above. // Replace SHR+AND with USHR, for example "(x >> 24) & 0xff" -> "x >>> 24". - HUShr* ushr = new (GetGraph()->GetArena()) HUShr(instruction->GetType(), - input_other->InputAt(0), - input_other->InputAt(1), - input_other->GetDexPc()); + HUShr* ushr = new (GetGraph()->GetAllocator()) HUShr(instruction->GetType(), + input_other->InputAt(0), + input_other->InputAt(1), + input_other->GetDexPc()); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, ushr); input_other->GetBlock()->RemoveInstruction(input_other); RecordSimplification(); return; } } + if ((value == 0xff || value == 0xffff) && instruction->GetType() != DataType::Type::kInt64) { + // Transform AND to a type conversion to Uint8/Uint16. If `input_other` is a field + // or array Get with only a single use, short-circuit the subsequent simplification + // of the Get+TypeConversion and change the Get's type to `new_type` instead. + DataType::Type new_type = (value == 0xff) ? DataType::Type::kUint8 : DataType::Type::kUint16; + DataType::Type find_type = (value == 0xff) ? DataType::Type::kInt8 : DataType::Type::kInt16; + if (input_other->GetType() == find_type && + input_other->HasOnlyOneNonEnvironmentUse() && + TryReplaceFieldOrArrayGetType(input_other, new_type)) { + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } else if (DataType::IsTypeConversionImplicit(input_other->GetType(), new_type)) { + instruction->ReplaceWith(input_other); + instruction->GetBlock()->RemoveInstruction(instruction); + } else { + HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion( + new_type, input_other, instruction->GetDexPc()); + instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, type_conversion); + } + RecordSimplification(); + return; + } } // We assume that GVN has run before, so we only perform a pointer comparison. @@ -1204,7 +1537,8 @@ void InstructionSimplifierVisitor::VisitCondition(HCondition* condition) { // on the right hand side. if (condition->GetLeft()->IsConstant() && !condition->GetRight()->IsConstant()) { HBasicBlock* block = condition->GetBlock(); - HCondition* replacement = GetOppositeConditionSwapOps(block->GetGraph()->GetArena(), condition); + HCondition* replacement = + GetOppositeConditionSwapOps(block->GetGraph()->GetAllocator(), condition); // If it is a fp we must set the opposite bias. if (replacement != nullptr) { if (condition->IsLtBias()) { @@ -1281,7 +1615,7 @@ static constexpr bool CanDivideByReciprocalMultiplyDouble(int64_t divisor) { void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); if ((input_cst != nullptr) && input_cst->IsOne()) { // Replace code looking like @@ -1300,24 +1634,24 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { // with // NEG dst, src instruction->GetBlock()->ReplaceAndRemoveInstructionWith( - instruction, new (GetGraph()->GetArena()) HNeg(type, input_other)); + instruction, new (GetGraph()->GetAllocator()) HNeg(type, input_other)); RecordSimplification(); return; } - if ((input_cst != nullptr) && Primitive::IsFloatingPointType(type)) { + if ((input_cst != nullptr) && DataType::IsFloatingPointType(type)) { // Try replacing code looking like // DIV dst, src, constant // with // MUL dst, src, 1 / constant HConstant* reciprocal = nullptr; - if (type == Primitive::Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { double value = input_cst->AsDoubleConstant()->GetValue(); if (CanDivideByReciprocalMultiplyDouble(bit_cast<int64_t, double>(value))) { reciprocal = GetGraph()->GetDoubleConstant(1.0 / value); } } else { - DCHECK_EQ(type, Primitive::kPrimFloat); + DCHECK_EQ(type, DataType::Type::kFloat32); float value = input_cst->AsFloatConstant()->GetValue(); if (CanDivideByReciprocalMultiplyFloat(bit_cast<int32_t, float>(value))) { reciprocal = GetGraph()->GetFloatConstant(1.0f / value); @@ -1326,7 +1660,7 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { if (reciprocal != nullptr) { instruction->GetBlock()->ReplaceAndRemoveInstructionWith( - instruction, new (GetGraph()->GetArena()) HMul(type, input_other, reciprocal)); + instruction, new (GetGraph()->GetAllocator()) HMul(type, input_other, reciprocal)); RecordSimplification(); return; } @@ -1336,9 +1670,9 @@ void InstructionSimplifierVisitor::VisitDiv(HDiv* instruction) { void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); HBasicBlock* block = instruction->GetBlock(); - ArenaAllocator* allocator = GetGraph()->GetArena(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); if (input_cst == nullptr) { return; @@ -1356,7 +1690,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { } if (input_cst->IsMinusOne() && - (Primitive::IsFloatingPointType(type) || Primitive::IsIntOrLongType(type))) { + (DataType::IsFloatingPointType(type) || DataType::IsIntOrLongType(type))) { // Replace code looking like // MUL dst, src, -1 // with @@ -1367,7 +1701,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { return; } - if (Primitive::IsFloatingPointType(type) && + if (DataType::IsFloatingPointType(type) && ((input_cst->IsFloatConstant() && input_cst->AsFloatConstant()->GetValue() == 2.0f) || (input_cst->IsDoubleConstant() && input_cst->AsDoubleConstant()->GetValue() == 2.0))) { // Replace code looking like @@ -1381,7 +1715,7 @@ void InstructionSimplifierVisitor::VisitMul(HMul* instruction) { return; } - if (Primitive::IsIntOrLongType(type)) { + if (DataType::IsIntOrLongType(type)) { int64_t factor = Int64FromConstant(input_cst); // Even though constant propagation also takes care of the zero case, other // optimizations can lead to having a zero multiplication. @@ -1464,7 +1798,7 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { } if (input->IsSub() && input->HasOnlyOneNonEnvironmentUse() && - !Primitive::IsFloatingPointType(input->GetType())) { + !DataType::IsFloatingPointType(input->GetType())) { // Replace code looking like // SUB tmp, a, b // NEG dst, tmp @@ -1477,8 +1811,8 @@ void InstructionSimplifierVisitor::VisitNeg(HNeg* instruction) { // removed. // We do not perform optimization for fp because we could lose the sign of zero. HSub* sub = input->AsSub(); - HSub* new_sub = - new (GetGraph()->GetArena()) HSub(instruction->GetType(), sub->GetRight(), sub->GetLeft()); + HSub* new_sub = new (GetGraph()->GetAllocator()) HSub( + instruction->GetType(), sub->GetRight(), sub->GetLeft()); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, new_sub); if (!sub->HasUses()) { sub->GetBlock()->RemoveInstruction(sub); @@ -1560,8 +1894,8 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { HConstant* input_cst = instruction->GetConstantRight(); HInstruction* input_other = instruction->GetLeastConstantLeft(); - Primitive::Type type = instruction->GetType(); - if (Primitive::IsFloatingPointType(type)) { + DataType::Type type = instruction->GetType(); + if (DataType::IsFloatingPointType(type)) { return; } @@ -1580,7 +1914,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { } HBasicBlock* block = instruction->GetBlock(); - ArenaAllocator* allocator = GetGraph()->GetArena(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); HInstruction* left = instruction->GetLeft(); HInstruction* right = instruction->GetRight(); @@ -1612,7 +1946,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { // SUB dst, a, tmp // with // ADD dst, a, b - HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left, right->AsNeg()->GetInput()); + HAdd* add = new(GetGraph()->GetAllocator()) HAdd(type, left, right->AsNeg()->GetInput()); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, add); RecordSimplification(); right->GetBlock()->RemoveInstruction(right); @@ -1628,9 +1962,9 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { // NEG dst, tmp // The second version is not intrinsically better, but enables more // transformations. - HAdd* add = new(GetGraph()->GetArena()) HAdd(type, left->AsNeg()->GetInput(), right); + HAdd* add = new(GetGraph()->GetAllocator()) HAdd(type, left->AsNeg()->GetInput(), right); instruction->GetBlock()->InsertInstructionBefore(add, instruction); - HNeg* neg = new (GetGraph()->GetArena()) HNeg(instruction->GetType(), add); + HNeg* neg = new (GetGraph()->GetAllocator()) HNeg(instruction->GetType(), add); instruction->GetBlock()->InsertInstructionBefore(neg, instruction); instruction->ReplaceWith(neg); instruction->GetBlock()->RemoveInstruction(instruction); @@ -1652,7 +1986,7 @@ void InstructionSimplifierVisitor::VisitSub(HSub* instruction) { // SUB instruction is not needed in this case, we may use // one of inputs of ADD instead. // It is applicable to integral types only. - DCHECK(Primitive::IsIntegralType(type)); + DCHECK(DataType::IsIntegralType(type)); if (left->InputAt(1) == right) { instruction->ReplaceWith(left->InputAt(0)); RecordSimplification(); @@ -1687,12 +2021,12 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { } if ((input_cst != nullptr) && input_cst->IsOne() - && input_other->GetType() == Primitive::kPrimBoolean) { + && input_other->GetType() == DataType::Type::kBool) { // Replace code looking like // XOR dst, src, 1 // with // BOOLEAN_NOT dst, src - HBooleanNot* boolean_not = new (GetGraph()->GetArena()) HBooleanNot(input_other); + HBooleanNot* boolean_not = new (GetGraph()->GetAllocator()) HBooleanNot(input_other); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, boolean_not); RecordSimplification(); return; @@ -1703,7 +2037,7 @@ void InstructionSimplifierVisitor::VisitXor(HXor* instruction) { // XOR dst, src, 0xFFF...FF // with // NOT dst, src - HNot* bitwise_not = new (GetGraph()->GetArena()) HNot(instruction->GetType(), input_other); + HNot* bitwise_not = new (GetGraph()->GetAllocator()) HNot(instruction->GetType(), input_other); instruction->GetBlock()->ReplaceAndRemoveInstructionWith(instruction, bitwise_not); RecordSimplification(); return; @@ -1758,13 +2092,29 @@ void InstructionSimplifierVisitor::SimplifyStringEquals(HInvoke* instruction) { ReferenceTypeInfo argument_rti = argument->GetReferenceTypeInfo(); if (argument_rti.IsValid() && argument_rti.IsStringClass()) { optimizations.SetArgumentIsString(); + } else if (kUseReadBarrier) { + DCHECK(instruction->GetResolvedMethod() != nullptr); + DCHECK(instruction->GetResolvedMethod()->GetDeclaringClass()->IsStringClass() || + // Object.equals() can be devirtualized to String.equals(). + instruction->GetResolvedMethod()->GetDeclaringClass()->IsObjectClass()); + Runtime* runtime = Runtime::Current(); + // For AOT, we always assume that the boot image shall contain the String.class and + // we do not need a read barrier for boot image classes as they are non-moveable. + // For JIT, check if we actually have a boot image; if we do, the String.class + // should also be non-moveable. + if (runtime->IsAotCompiler() || runtime->GetHeap()->HasBootImageSpace()) { + DCHECK(runtime->IsAotCompiler() || + !runtime->GetHeap()->IsMovableObject( + instruction->GetResolvedMethod()->GetDeclaringClass())); + optimizations.SetNoReadBarrierForStringClass(); + } } } } void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, bool is_left, - Primitive::Type type) { + DataType::Type type) { DCHECK(invoke->IsInvokeStaticOrDirect()); DCHECK_EQ(invoke->GetInvokeType(), InvokeType::kStatic); HInstruction* value = invoke->InputAt(0); @@ -1774,10 +2124,10 @@ void InstructionSimplifierVisitor::SimplifyRotate(HInvoke* invoke, // Unconditionally set the type of the negated distance to `int`, // as shift and rotate operations expect a 32-bit (or narrower) // value for their distance input. - distance = new (GetGraph()->GetArena()) HNeg(Primitive::kPrimInt, distance); + distance = new (GetGraph()->GetAllocator()) HNeg(DataType::Type::kInt32, distance); invoke->GetBlock()->InsertInstructionBefore(distance, invoke); } - HRor* ror = new (GetGraph()->GetArena()) HRor(type, value, distance); + HRor* ror = new (GetGraph()->GetAllocator()) HRor(type, value, distance); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, ror); // Remove ClinitCheck and LoadClass, if possible. HInstruction* clinit = invoke->GetInputs().back(); @@ -1827,8 +2177,8 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) { ScopedObjectAccess soa(Thread::Current()); - Primitive::Type source_component_type = Primitive::kPrimVoid; - Primitive::Type destination_component_type = Primitive::kPrimVoid; + DataType::Type source_component_type = DataType::Type::kVoid; + DataType::Type destination_component_type = DataType::Type::kVoid; ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo(); if (destination_rti.IsValid()) { if (destination_rti.IsObjectArray()) { @@ -1838,8 +2188,8 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) optimizations.SetDestinationIsTypedObjectArray(); } if (destination_rti.IsPrimitiveArrayClass()) { - destination_component_type = - destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType(); + destination_component_type = DataTypeFromPrimitive( + destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType()); optimizations.SetDestinationIsPrimitiveArray(); } else if (destination_rti.IsNonPrimitiveArrayClass()) { optimizations.SetDestinationIsNonPrimitiveArray(); @@ -1852,13 +2202,14 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) } if (source_rti.IsPrimitiveArrayClass()) { optimizations.SetSourceIsPrimitiveArray(); - source_component_type = source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType(); + source_component_type = DataTypeFromPrimitive( + source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType()); } else if (source_rti.IsNonPrimitiveArrayClass()) { optimizations.SetSourceIsNonPrimitiveArray(); } } // For primitive arrays, use their optimized ArtMethod implementations. - if ((source_component_type != Primitive::kPrimVoid) && + if ((source_component_type != DataType::Type::kVoid) && (source_component_type == destination_component_type)) { ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); PointerSize image_size = class_linker->GetImagePointerSize(); @@ -1866,28 +2217,28 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass(); ArtMethod* method = nullptr; switch (source_component_type) { - case Primitive::kPrimBoolean: + case DataType::Type::kBool: method = system->FindClassMethod("arraycopy", "([ZI[ZII)V", image_size); break; - case Primitive::kPrimByte: + case DataType::Type::kInt8: method = system->FindClassMethod("arraycopy", "([BI[BII)V", image_size); break; - case Primitive::kPrimChar: + case DataType::Type::kUint16: method = system->FindClassMethod("arraycopy", "([CI[CII)V", image_size); break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: method = system->FindClassMethod("arraycopy", "([SI[SII)V", image_size); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: method = system->FindClassMethod("arraycopy", "([II[III)V", image_size); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: method = system->FindClassMethod("arraycopy", "([FI[FII)V", image_size); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: method = system->FindClassMethod("arraycopy", "([JI[JII)V", image_size); break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: method = system->FindClassMethod("arraycopy", "([DI[DII)V", image_size); break; default: @@ -1908,19 +2259,19 @@ void InstructionSimplifierVisitor::SimplifySystemArrayCopy(HInvoke* instruction) void InstructionSimplifierVisitor::SimplifyCompare(HInvoke* invoke, bool is_signum, - Primitive::Type type) { + DataType::Type type) { DCHECK(invoke->IsInvokeStaticOrDirect()); uint32_t dex_pc = invoke->GetDexPc(); HInstruction* left = invoke->InputAt(0); HInstruction* right; if (!is_signum) { right = invoke->InputAt(1); - } else if (type == Primitive::kPrimLong) { + } else if (type == DataType::Type::kInt64) { right = GetGraph()->GetLongConstant(0); } else { right = GetGraph()->GetIntConstant(0); } - HCompare* compare = new (GetGraph()->GetArena()) + HCompare* compare = new (GetGraph()->GetAllocator()) HCompare(type, left, right, ComparisonBias::kNoBias, dex_pc); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, compare); } @@ -1930,7 +2281,7 @@ void InstructionSimplifierVisitor::SimplifyIsNaN(HInvoke* invoke) { uint32_t dex_pc = invoke->GetDexPc(); // IsNaN(x) is the same as x != x. HInstruction* x = invoke->InputAt(0); - HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc); + HCondition* condition = new (GetGraph()->GetAllocator()) HNotEqual(x, x, dex_pc); condition->SetBias(ComparisonBias::kLtBias); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, condition); } @@ -1939,17 +2290,17 @@ void InstructionSimplifierVisitor::SimplifyFP2Int(HInvoke* invoke) { DCHECK(invoke->IsInvokeStaticOrDirect()); uint32_t dex_pc = invoke->GetDexPc(); HInstruction* x = invoke->InputAt(0); - Primitive::Type type = x->GetType(); + DataType::Type type = x->GetType(); // Set proper bit pattern for NaN and replace intrinsic with raw version. HInstruction* nan; - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { nan = GetGraph()->GetLongConstant(0x7ff8000000000000L); invoke->SetIntrinsic(Intrinsics::kDoubleDoubleToRawLongBits, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow); } else { - DCHECK_EQ(type, Primitive::kPrimFloat); + DCHECK_EQ(type, DataType::Type::kFloat32); nan = GetGraph()->GetIntConstant(0x7fc00000); invoke->SetIntrinsic(Intrinsics::kFloatFloatToRawIntBits, kNeedsEnvironmentOrCache, @@ -1957,11 +2308,11 @@ void InstructionSimplifierVisitor::SimplifyFP2Int(HInvoke* invoke) { kNoThrow); } // Test IsNaN(x), which is the same as x != x. - HCondition* condition = new (GetGraph()->GetArena()) HNotEqual(x, x, dex_pc); + HCondition* condition = new (GetGraph()->GetAllocator()) HNotEqual(x, x, dex_pc); condition->SetBias(ComparisonBias::kLtBias); invoke->GetBlock()->InsertInstructionBefore(condition, invoke->GetNext()); // Select between the two. - HInstruction* select = new (GetGraph()->GetArena()) HSelect(condition, nan, invoke, dex_pc); + HInstruction* select = new (GetGraph()->GetAllocator()) HSelect(condition, nan, invoke, dex_pc); invoke->GetBlock()->InsertInstructionBefore(select, condition->GetNext()); invoke->ReplaceWithExceptInReplacementAtIndex(select, 0); // false at index 0 } @@ -1970,16 +2321,20 @@ void InstructionSimplifierVisitor::SimplifyStringCharAt(HInvoke* invoke) { HInstruction* str = invoke->InputAt(0); HInstruction* index = invoke->InputAt(1); uint32_t dex_pc = invoke->GetDexPc(); - ArenaAllocator* arena = GetGraph()->GetArena(); + ArenaAllocator* allocator = GetGraph()->GetAllocator(); // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength, HBoundsCheck and HArrayGet. - HArrayLength* length = new (arena) HArrayLength(str, dex_pc, /* is_string_length */ true); + HArrayLength* length = new (allocator) HArrayLength(str, dex_pc, /* is_string_length */ true); invoke->GetBlock()->InsertInstructionBefore(length, invoke); - HBoundsCheck* bounds_check = new (arena) HBoundsCheck( - index, length, dex_pc, invoke->GetDexMethodIndex()); + HBoundsCheck* bounds_check = new (allocator) HBoundsCheck( + index, length, dex_pc, /* is_string_char_at */ true); invoke->GetBlock()->InsertInstructionBefore(bounds_check, invoke); - HArrayGet* array_get = new (arena) HArrayGet( - str, bounds_check, Primitive::kPrimChar, dex_pc, /* is_string_char_at */ true); + HArrayGet* array_get = new (allocator) HArrayGet(str, + bounds_check, + DataType::Type::kUint16, + SideEffects::None(), // Strings are immutable. + dex_pc, + /* is_string_char_at */ true); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, array_get); bounds_check->CopyEnvironmentFrom(invoke->GetEnvironment()); GetGraph()->SetHasBoundsChecks(true); @@ -1991,13 +2346,13 @@ void InstructionSimplifierVisitor::SimplifyStringIsEmptyOrLength(HInvoke* invoke // We treat String as an array to allow DCE and BCE to seamlessly work on strings, // so create the HArrayLength. HArrayLength* length = - new (GetGraph()->GetArena()) HArrayLength(str, dex_pc, /* is_string_length */ true); + new (GetGraph()->GetAllocator()) HArrayLength(str, dex_pc, /* is_string_length */ true); HInstruction* replacement; if (invoke->GetIntrinsic() == Intrinsics::kStringIsEmpty) { // For String.isEmpty(), create the `HEqual` representing the `length == 0`. invoke->GetBlock()->InsertInstructionBefore(length, invoke); HIntConstant* zero = GetGraph()->GetIntConstant(0); - HEqual* equal = new (GetGraph()->GetArena()) HEqual(length, zero, dex_pc); + HEqual* equal = new (GetGraph()->GetAllocator()) HEqual(length, zero, dex_pc); replacement = equal; } else { DCHECK_EQ(invoke->GetIntrinsic(), Intrinsics::kStringLength); @@ -2067,9 +2422,11 @@ void InstructionSimplifierVisitor::SimplifyAllocationIntrinsic(HInvoke* invoke) } } -void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) { +void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, + MemBarrierKind barrier_kind) { uint32_t dex_pc = invoke->GetDexPc(); - HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc); + HMemoryBarrier* mem_barrier = + new (GetGraph()->GetAllocator()) HMemoryBarrier(barrier_kind, dex_pc); invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, mem_barrier); } @@ -2082,28 +2439,28 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { SimplifySystemArrayCopy(instruction); break; case Intrinsics::kIntegerRotateRight: - SimplifyRotate(instruction, /* is_left */ false, Primitive::kPrimInt); + SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt32); break; case Intrinsics::kLongRotateRight: - SimplifyRotate(instruction, /* is_left */ false, Primitive::kPrimLong); + SimplifyRotate(instruction, /* is_left */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, Primitive::kPrimInt); + SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt32); break; case Intrinsics::kLongRotateLeft: - SimplifyRotate(instruction, /* is_left */ true, Primitive::kPrimLong); + SimplifyRotate(instruction, /* is_left */ true, DataType::Type::kInt64); break; case Intrinsics::kIntegerCompare: - SimplifyCompare(instruction, /* is_signum */ false, Primitive::kPrimInt); + SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt32); break; case Intrinsics::kLongCompare: - SimplifyCompare(instruction, /* is_signum */ false, Primitive::kPrimLong); + SimplifyCompare(instruction, /* is_signum */ false, DataType::Type::kInt64); break; case Intrinsics::kIntegerSignum: - SimplifyCompare(instruction, /* is_signum */ true, Primitive::kPrimInt); + SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt32); break; case Intrinsics::kLongSignum: - SimplifyCompare(instruction, /* is_signum */ true, Primitive::kPrimLong); + SimplifyCompare(instruction, /* is_signum */ true, DataType::Type::kInt64); break; case Intrinsics::kFloatIsNaN: case Intrinsics::kDoubleIsNaN: @@ -2141,6 +2498,21 @@ void InstructionSimplifierVisitor::VisitInvoke(HInvoke* instruction) { case Intrinsics::kUnsafeFullFence: SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny); break; + case Intrinsics::kVarHandleFullFence: + SimplifyMemBarrier(instruction, MemBarrierKind::kAnyAny); + break; + case Intrinsics::kVarHandleAcquireFence: + SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny); + break; + case Intrinsics::kVarHandleReleaseFence: + SimplifyMemBarrier(instruction, MemBarrierKind::kAnyStore); + break; + case Intrinsics::kVarHandleLoadLoadFence: + SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny); + break; + case Intrinsics::kVarHandleStoreStoreFence: + SimplifyMemBarrier(instruction, MemBarrierKind::kStoreStore); + break; default: break; } @@ -2171,7 +2543,7 @@ bool InstructionSimplifierVisitor::TryHandleAssociativeAndCommutativeOperation( HBinaryOperation* instruction) { DCHECK(instruction->IsCommutative()); - if (!Primitive::IsIntegralType(instruction->GetType())) { + if (!DataType::IsIntegralType(instruction->GetType())) { return false; } @@ -2221,12 +2593,12 @@ static HBinaryOperation* AsAddOrSub(HInstruction* binop) { } // Helper function that performs addition statically, considering the result type. -static int64_t ComputeAddition(Primitive::Type type, int64_t x, int64_t y) { +static int64_t ComputeAddition(DataType::Type type, int64_t x, int64_t y) { // Use the Compute() method for consistency with TryStaticEvaluation(). - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { return HAdd::Compute<int32_t>(x, y); } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); return HAdd::Compute<int64_t>(x, y); } } @@ -2248,8 +2620,8 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( HBinaryOperation* instruction) { DCHECK(instruction->IsAdd() || instruction->IsSub()) << instruction->DebugName(); - Primitive::Type type = instruction->GetType(); - if (!Primitive::IsIntegralType(type)) { + DataType::Type type = instruction->GetType(); + if (!DataType::IsIntegralType(type)) { return false; } @@ -2293,13 +2665,13 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( int64_t const3_val = ComputeAddition(type, const1_val, const2_val); HBasicBlock* block = instruction->GetBlock(); HConstant* const3 = block->GetGraph()->GetConstant(type, const3_val); - ArenaAllocator* arena = instruction->GetArena(); + ArenaAllocator* allocator = instruction->GetAllocator(); HInstruction* z; if (is_x_negated) { - z = new (arena) HSub(type, const3, x, instruction->GetDexPc()); + z = new (allocator) HSub(type, const3, x, instruction->GetDexPc()); } else { - z = new (arena) HAdd(type, x, const3, instruction->GetDexPc()); + z = new (allocator) HAdd(type, x, const3, instruction->GetDexPc()); } block->ReplaceAndRemoveInstructionWith(instruction, z); @@ -2307,4 +2679,10 @@ bool InstructionSimplifierVisitor::TrySubtractionChainSimplification( return true; } +void InstructionSimplifierVisitor::VisitVecMul(HVecMul* instruction) { + if (TryCombineVecMultiplyAccumulate(instruction)) { + RecordSimplification(); + } +} + } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.cc b/compiler/optimizing/instruction_simplifier_arm.cc index fe22595258..92081e30b1 100644 --- a/compiler/optimizing/instruction_simplifier_arm.cc +++ b/compiler/optimizing/instruction_simplifier_arm.cc @@ -14,9 +14,10 @@ * limitations under the License. */ +#include "instruction_simplifier_arm.h" + #include "code_generator.h" #include "common_arm.h" -#include "instruction_simplifier_arm.h" #include "instruction_simplifier_shared.h" #include "mirror/array-inl.h" #include "mirror/string.h" @@ -29,18 +30,65 @@ using helpers::HasShifterOperand; namespace arm { -using helpers::ShifterOperandSupportsExtension; +class InstructionSimplifierArmVisitor : public HGraphVisitor { + public: + InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), stats_(stats) {} + + private: + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + } + + /** + * This simplifier uses a special-purpose BB visitor. + * (1) No need to visit Phi nodes. + * (2) Since statements can be removed in a "forward" fashion, + * the visitor should test if each statement is still there. + */ + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // TODO: fragile iteration, provide more robust iterators? + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) OVERRIDE; + void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; +}; bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { - DCHECK(HasShifterOperand(use, kArm)); + DCHECK(HasShifterOperand(use, InstructionSet::kArm)); DCHECK(use->IsBinaryOperation()); DCHECK(CanFitInShifterOperand(bitfield_op)); DCHECK(!bitfield_op->HasEnvironmentUses()); - Primitive::Type type = use->GetType(); - if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + DataType::Type type = use->GetType(); + if (type != DataType::Type::kInt32 && type != DataType::Type::kInt64) { return false; } @@ -71,28 +119,28 @@ bool InstructionSimplifierArmVisitor::TryMergeIntoShifterOperand(HInstruction* u int shift_amount = 0; HDataProcWithShifterOp::GetOpInfoFromInstruction(bitfield_op, &op_kind, &shift_amount); - shift_amount &= use->GetType() == Primitive::kPrimInt + shift_amount &= use->GetType() == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance; if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { - if (!ShifterOperandSupportsExtension(use)) { + if (!use->IsAdd() && (!use->IsSub() || use->GetType() != DataType::Type::kInt64)) { return false; } // Shift by 1 is a special case that results in the same number and type of instructions // as this simplification, but potentially shorter code. - } else if (type == Primitive::kPrimLong && shift_amount == 1) { + } else if (type == DataType::Type::kInt64 && shift_amount == 1) { return false; } if (do_merge) { HDataProcWithShifterOp* alu_with_op = - new (GetGraph()->GetArena()) HDataProcWithShifterOp(use, - other_input, - bitfield_op->InputAt(0), - op_kind, - shift_amount, - use->GetDexPc()); + new (GetGraph()->GetAllocator()) HDataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); if (bitfield_op->GetUses().empty()) { bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); @@ -116,7 +164,7 @@ bool InstructionSimplifierArmVisitor::TryMergeIntoUsersShifterOperand(HInstructi // Check whether we can merge the instruction in all its users' shifter operand. for (const HUseListNode<HInstruction*>& use : uses) { HInstruction* user = use.GetUser(); - if (!HasShifterOperand(user, kArm)) { + if (!HasShifterOperand(user, InstructionSet::kArm)) { return false; } if (!CanMergeIntoShifterOperand(user, bitfield_op)) { @@ -144,7 +192,7 @@ void InstructionSimplifierArmVisitor::VisitAnd(HAnd* instruction) { void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction); - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); // TODO: Implement reading (length + compression) for String compression feature from // negative offset (count_offset - data_offset). Thumb2Assembler (now removed) did @@ -154,9 +202,9 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { return; } - if (type == Primitive::kPrimLong - || type == Primitive::kPrimFloat - || type == Primitive::kPrimDouble) { + if (type == DataType::Type::kInt64 + || type == DataType::Type::kFloat32 + || type == DataType::Type::kFloat64) { // T32 doesn't support ShiftedRegOffset mem address mode for these types // to enable optimization. return; @@ -171,13 +219,13 @@ void InstructionSimplifierArmVisitor::VisitArrayGet(HArrayGet* instruction) { } void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { - size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); + size_t access_size = DataType::Size(instruction->GetComponentType()); size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); - Primitive::Type type = instruction->GetComponentType(); + DataType::Type type = instruction->GetComponentType(); - if (type == Primitive::kPrimLong - || type == Primitive::kPrimFloat - || type == Primitive::kPrimDouble) { + if (type == DataType::Type::kInt64 + || type == DataType::Type::kFloat32 + || type == DataType::Type::kFloat64) { // T32 doesn't support ShiftedRegOffset mem address mode for these types // to enable optimization. return; @@ -192,7 +240,7 @@ void InstructionSimplifierArmVisitor::VisitArraySet(HArraySet* instruction) { } void InstructionSimplifierArmVisitor::VisitMul(HMul* instruction) { - if (TryCombineMultiplyAccumulate(instruction, kArm)) { + if (TryCombineMultiplyAccumulate(instruction, InstructionSet::kArm)) { RecordSimplification(); } } @@ -216,15 +264,15 @@ void InstructionSimplifierArmVisitor::VisitShr(HShr* instruction) { } void InstructionSimplifierArmVisitor::VisitTypeConversion(HTypeConversion* instruction) { - Primitive::Type result_type = instruction->GetResultType(); - Primitive::Type input_type = instruction->GetInputType(); + DataType::Type result_type = instruction->GetResultType(); + DataType::Type input_type = instruction->GetInputType(); if (input_type == result_type) { // We let the arch-independent code handle this. return; } - if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { TryMergeIntoUsersShifterOperand(instruction); } } @@ -235,5 +283,10 @@ void InstructionSimplifierArmVisitor::VisitUShr(HUShr* instruction) { } } +void InstructionSimplifierArm::Run() { + InstructionSimplifierArmVisitor visitor(graph_, stats_); + visitor.VisitReversePostOrder(); +} + } // namespace arm } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h index e2ed257777..2f6572931f 100644 --- a/compiler/optimizing/instruction_simplifier_arm.h +++ b/compiler/optimizing/instruction_simplifier_arm.h @@ -23,58 +23,6 @@ namespace art { namespace arm { -class InstructionSimplifierArmVisitor : public HGraphVisitor { - public: - InstructionSimplifierArmVisitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} - - private: - void RecordSimplification() { - if (stats_ != nullptr) { - stats_->RecordStat(kInstructionSimplificationsArch); - } - } - - bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); - bool TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge); - bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); - } - bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); - } - - /** - * This simplifier uses a special-purpose BB visitor. - * (1) No need to visit Phi nodes. - * (2) Since statements can be removed in a "forward" fashion, - * the visitor should test if each statement is still there. - */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - // TODO: fragile iteration, provide more robust iterators? - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInBlock()) { - instruction->Accept(this); - } - } - } - - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - - OptimizingCompilerStats* stats_; -}; - - class InstructionSimplifierArm : public HOptimization { public: InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats) @@ -82,10 +30,7 @@ class InstructionSimplifierArm : public HOptimization { static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm"; - void Run() OVERRIDE { - InstructionSimplifierArmVisitor visitor(graph_, stats_); - visitor.VisitReversePostOrder(); - } + void Run() OVERRIDE; }; } // namespace arm diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc index 311be1fb49..1c44e5ac49 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.cc +++ b/compiler/optimizing/instruction_simplifier_arm64.cc @@ -30,16 +30,71 @@ namespace arm64 { using helpers::ShifterOperandSupportsExtension; +class InstructionSimplifierArm64Visitor : public HGraphVisitor { + public: + InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats) + : HGraphVisitor(graph), stats_(stats) {} + + private: + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); + bool TryMergeIntoShifterOperand(HInstruction* use, + HInstruction* bitfield_op, + bool do_merge); + bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); + } + bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { + DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); + return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); + } + + /** + * This simplifier uses a special-purpose BB visitor. + * (1) No need to visit Phi nodes. + * (2) Since statements can be removed in a "forward" fashion, + * the visitor should test if each statement is still there. + */ + void VisitBasicBlock(HBasicBlock* block) OVERRIDE { + // TODO: fragile iteration, provide more robust iterators? + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instruction = it.Current(); + if (instruction->IsInBlock()) { + instruction->Accept(this); + } + } + } + + // HInstruction visitors, sorted alphabetically. + void VisitAnd(HAnd* instruction) OVERRIDE; + void VisitArrayGet(HArrayGet* instruction) OVERRIDE; + void VisitArraySet(HArraySet* instruction) OVERRIDE; + void VisitMul(HMul* instruction) OVERRIDE; + void VisitOr(HOr* instruction) OVERRIDE; + void VisitShl(HShl* instruction) OVERRIDE; + void VisitShr(HShr* instruction) OVERRIDE; + void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; + void VisitUShr(HUShr* instruction) OVERRIDE; + void VisitXor(HXor* instruction) OVERRIDE; + void VisitVecLoad(HVecLoad* instruction) OVERRIDE; + void VisitVecStore(HVecStore* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; +}; + bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op, bool do_merge) { - DCHECK(HasShifterOperand(use, kArm64)); + DCHECK(HasShifterOperand(use, InstructionSet::kArm64)); DCHECK(use->IsBinaryOperation() || use->IsNeg()); DCHECK(CanFitInShifterOperand(bitfield_op)); DCHECK(!bitfield_op->HasEnvironmentUses()); - Primitive::Type type = use->GetType(); - if (type != Primitive::kPrimInt && type != Primitive::kPrimLong) { + DataType::Type type = use->GetType(); + if (type != DataType::Type::kInt32 && type != DataType::Type::kInt64) { return false; } @@ -84,12 +139,12 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoShifterOperand(HInstruction* if (do_merge) { HDataProcWithShifterOp* alu_with_op = - new (GetGraph()->GetArena()) HDataProcWithShifterOp(use, - other_input, - bitfield_op->InputAt(0), - op_kind, - shift_amount, - use->GetDexPc()); + new (GetGraph()->GetAllocator()) HDataProcWithShifterOp(use, + other_input, + bitfield_op->InputAt(0), + op_kind, + shift_amount, + use->GetDexPc()); use->GetBlock()->ReplaceAndRemoveInstructionWith(use, alu_with_op); if (bitfield_op->GetUses().empty()) { bitfield_op->GetBlock()->RemoveInstruction(bitfield_op); @@ -113,7 +168,7 @@ bool InstructionSimplifierArm64Visitor::TryMergeIntoUsersShifterOperand(HInstruc // Check whether we can merge the instruction in all its users' shifter operand. for (const HUseListNode<HInstruction*>& use : uses) { HInstruction* user = use.GetUser(); - if (!HasShifterOperand(user, kArm64)) { + if (!HasShifterOperand(user, InstructionSet::kArm64)) { return false; } if (!CanMergeIntoShifterOperand(user, bitfield_op)) { @@ -150,7 +205,7 @@ void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) { } void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { - size_t access_size = Primitive::ComponentSize(instruction->GetComponentType()); + size_t access_size = DataType::Size(instruction->GetComponentType()); size_t data_offset = mirror::Array::DataOffset(access_size).Uint32Value(); if (TryExtractArrayAccessAddress(instruction, instruction->GetArray(), @@ -161,7 +216,7 @@ void InstructionSimplifierArm64Visitor::VisitArraySet(HArraySet* instruction) { } void InstructionSimplifierArm64Visitor::VisitMul(HMul* instruction) { - if (TryCombineMultiplyAccumulate(instruction, kArm64)) { + if (TryCombineMultiplyAccumulate(instruction, InstructionSet::kArm64)) { RecordSimplification(); } } @@ -185,15 +240,15 @@ void InstructionSimplifierArm64Visitor::VisitShr(HShr* instruction) { } void InstructionSimplifierArm64Visitor::VisitTypeConversion(HTypeConversion* instruction) { - Primitive::Type result_type = instruction->GetResultType(); - Primitive::Type input_type = instruction->GetInputType(); + DataType::Type result_type = instruction->GetResultType(); + DataType::Type input_type = instruction->GetInputType(); if (input_type == result_type) { // We let the arch-independent code handle this. return; } - if (Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type)) { + if (DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type)) { TryMergeIntoUsersShifterOperand(instruction); } } @@ -210,12 +265,6 @@ void InstructionSimplifierArm64Visitor::VisitXor(HXor* instruction) { } } -void InstructionSimplifierArm64Visitor::VisitVecMul(HVecMul* instruction) { - if (TryCombineVecMultiplyAccumulate(instruction, kArm64)) { - RecordSimplification(); - } -} - void InstructionSimplifierArm64Visitor::VisitVecLoad(HVecLoad* instruction) { if (!instruction->IsStringCharAt() && TryExtractVecArrayAccessAddress(instruction, instruction->GetIndex())) { @@ -229,5 +278,10 @@ void InstructionSimplifierArm64Visitor::VisitVecStore(HVecStore* instruction) { } } +void InstructionSimplifierArm64::Run() { + InstructionSimplifierArm64Visitor visitor(graph_, stats_); + visitor.VisitReversePostOrder(); +} + } // namespace arm64 } // namespace art diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h index 8596f6ad40..d180a8dc46 100644 --- a/compiler/optimizing/instruction_simplifier_arm64.h +++ b/compiler/optimizing/instruction_simplifier_arm64.h @@ -23,65 +23,6 @@ namespace art { namespace arm64 { -class InstructionSimplifierArm64Visitor : public HGraphVisitor { - public: - InstructionSimplifierArm64Visitor(HGraph* graph, OptimizingCompilerStats* stats) - : HGraphVisitor(graph), stats_(stats) {} - - private: - void RecordSimplification() { - if (stats_ != nullptr) { - stats_->RecordStat(kInstructionSimplificationsArch); - } - } - - bool TryMergeIntoUsersShifterOperand(HInstruction* instruction); - bool TryMergeIntoShifterOperand(HInstruction* use, - HInstruction* bitfield_op, - bool do_merge); - bool CanMergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ false); - } - bool MergeIntoShifterOperand(HInstruction* use, HInstruction* bitfield_op) { - DCHECK(CanMergeIntoShifterOperand(use, bitfield_op)); - return TryMergeIntoShifterOperand(use, bitfield_op, /* do_merge */ true); - } - - /** - * This simplifier uses a special-purpose BB visitor. - * (1) No need to visit Phi nodes. - * (2) Since statements can be removed in a "forward" fashion, - * the visitor should test if each statement is still there. - */ - void VisitBasicBlock(HBasicBlock* block) OVERRIDE { - // TODO: fragile iteration, provide more robust iterators? - for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { - HInstruction* instruction = it.Current(); - if (instruction->IsInBlock()) { - instruction->Accept(this); - } - } - } - - // HInstruction visitors, sorted alphabetically. - void VisitAnd(HAnd* instruction) OVERRIDE; - void VisitArrayGet(HArrayGet* instruction) OVERRIDE; - void VisitArraySet(HArraySet* instruction) OVERRIDE; - void VisitMul(HMul* instruction) OVERRIDE; - void VisitOr(HOr* instruction) OVERRIDE; - void VisitShl(HShl* instruction) OVERRIDE; - void VisitShr(HShr* instruction) OVERRIDE; - void VisitTypeConversion(HTypeConversion* instruction) OVERRIDE; - void VisitUShr(HUShr* instruction) OVERRIDE; - void VisitXor(HXor* instruction) OVERRIDE; - void VisitVecMul(HVecMul* instruction) OVERRIDE; - void VisitVecLoad(HVecLoad* instruction) OVERRIDE; - void VisitVecStore(HVecStore* instruction) OVERRIDE; - - OptimizingCompilerStats* stats_; -}; - - class InstructionSimplifierArm64 : public HOptimization { public: InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats) @@ -89,10 +30,7 @@ class InstructionSimplifierArm64 : public HOptimization { static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64"; - void Run() OVERRIDE { - InstructionSimplifierArm64Visitor visitor(graph_, stats_); - visitor.VisitReversePostOrder(); - } + void Run() OVERRIDE; }; } // namespace arm64 diff --git a/compiler/optimizing/instruction_simplifier_mips.cc b/compiler/optimizing/instruction_simplifier_mips.cc new file mode 100644 index 0000000000..fa97401a0c --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_mips.cc @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "instruction_simplifier_mips.h" + +#include "arch/mips/instruction_set_features_mips.h" +#include "mirror/array-inl.h" + +namespace art { +namespace mips { + +class InstructionSimplifierMipsVisitor : public HGraphVisitor { + public: + InstructionSimplifierMipsVisitor(HGraph* graph, + CodeGenerator* codegen, + OptimizingCompilerStats* stats) + : HGraphVisitor(graph), + stats_(stats), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {} + + private: + void RecordSimplification() { + MaybeRecordStat(stats_, MethodCompilationStat::kInstructionSimplificationsArch); + } + + bool TryExtractArrayAccessIndex(HInstruction* access, + HInstruction* index, + DataType::Type packed_type); + void VisitArrayGet(HArrayGet* instruction) OVERRIDE; + void VisitArraySet(HArraySet* instruction) OVERRIDE; + + OptimizingCompilerStats* stats_; + CodeGeneratorMIPS* codegen_; +}; + +bool InstructionSimplifierMipsVisitor::TryExtractArrayAccessIndex(HInstruction* access, + HInstruction* index, + DataType::Type packed_type) { + if (codegen_->GetInstructionSetFeatures().IsR6() || + codegen_->GetInstructionSetFeatures().HasMsa()) { + return false; + } + if (index->IsConstant() || + (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) { + // If index is constant the whole address calculation often can be done by load/store + // instructions themselves. + // TODO: Treat the case with non-embeddable constants. + return false; + } + + if (packed_type != DataType::Type::kInt16 && packed_type != DataType::Type::kUint16 && + packed_type != DataType::Type::kInt32 && packed_type != DataType::Type::kInt64 && + packed_type != DataType::Type::kFloat32 && packed_type != DataType::Type::kFloat64) { + return false; + } + + if (access->IsArrayGet() && access->AsArrayGet()->IsStringCharAt()) { + return false; + } + + HGraph* graph = access->GetBlock()->GetGraph(); + ArenaAllocator* allocator = graph->GetAllocator(); + size_t component_shift = DataType::SizeShift(packed_type); + + bool is_extracting_beneficial = false; + // It is beneficial to extract index intermediate address only if there are at least 2 users. + for (const HUseListNode<HInstruction*>& use : index->GetUses()) { + HInstruction* user = use.GetUser(); + if (user->IsArrayGet() && user != access && !user->AsArrayGet()->IsStringCharAt()) { + HArrayGet* another_access = user->AsArrayGet(); + DataType::Type another_packed_type = another_access->GetType(); + size_t another_component_shift = DataType::SizeShift(another_packed_type); + if (another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } else if (user->IsArraySet() && user != access) { + HArraySet* another_access = user->AsArraySet(); + DataType::Type another_packed_type = another_access->GetType(); + size_t another_component_shift = DataType::SizeShift(another_packed_type); + if (another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } else if (user->IsIntermediateArrayAddressIndex()) { + HIntermediateArrayAddressIndex* another_access = user->AsIntermediateArrayAddressIndex(); + size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue(); + if (another_component_shift == component_shift) { + is_extracting_beneficial = true; + break; + } + } + } + + if (!is_extracting_beneficial) { + return false; + } + + HIntConstant* shift = graph->GetIntConstant(component_shift); + HIntermediateArrayAddressIndex* address = + new (allocator) HIntermediateArrayAddressIndex(index, shift, kNoDexPc); + access->GetBlock()->InsertInstructionBefore(address, access); + access->ReplaceInput(address, 1); + return true; +} + +void InstructionSimplifierMipsVisitor::VisitArrayGet(HArrayGet* instruction) { + DataType::Type packed_type = instruction->GetType(); + if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) { + RecordSimplification(); + } +} + +void InstructionSimplifierMipsVisitor::VisitArraySet(HArraySet* instruction) { + DataType::Type packed_type = instruction->GetComponentType(); + if (TryExtractArrayAccessIndex(instruction, instruction->GetIndex(), packed_type)) { + RecordSimplification(); + } +} + +void InstructionSimplifierMips::Run() { + InstructionSimplifierMipsVisitor visitor(graph_, codegen_, stats_); + visitor.VisitReversePostOrder(); +} + +} // namespace mips +} // namespace art diff --git a/compiler/optimizing/instruction_simplifier_mips.h b/compiler/optimizing/instruction_simplifier_mips.h new file mode 100644 index 0000000000..6cb8affe85 --- /dev/null +++ b/compiler/optimizing/instruction_simplifier_mips.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ +#define ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ + +#include "nodes.h" +#include "optimization.h" +#include "code_generator_mips.h" + +namespace art { + +class CodeGenerator; + +namespace mips { + +class InstructionSimplifierMips : public HOptimization { + public: + InstructionSimplifierMips(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) + : HOptimization(graph, kInstructionSimplifierMipsPassName, stats), + codegen_(down_cast<CodeGeneratorMIPS*>(codegen)) {} + + static constexpr const char* kInstructionSimplifierMipsPassName = "instruction_simplifier_mips"; + + void Run() OVERRIDE; + + private: + CodeGeneratorMIPS* codegen_; +}; + +} // namespace mips +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INSTRUCTION_SIMPLIFIER_MIPS_H_ diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc index d1bc4dadeb..ccdcb3532d 100644 --- a/compiler/optimizing/instruction_simplifier_shared.cc +++ b/compiler/optimizing/instruction_simplifier_shared.cc @@ -25,7 +25,7 @@ namespace { bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, HBinaryOperation* input_binop, HInstruction* input_other) { - DCHECK(Primitive::IsIntOrLongType(mul->GetType())); + DCHECK(DataType::IsIntOrLongType(mul->GetType())); DCHECK(input_binop->IsAdd() || input_binop->IsSub()); DCHECK_NE(input_binop, input_other); if (!input_binop->HasOnlyOneNonEnvironmentUse()) { @@ -75,8 +75,8 @@ bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, return false; } - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - HMultiplyAccumulate* mulacc = new(arena) HMultiplyAccumulate( + ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); + HMultiplyAccumulate* mulacc = new (allocator) HMultiplyAccumulate( mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc()); mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc); @@ -88,16 +88,16 @@ bool TrySimpleMultiplyAccumulatePatterns(HMul* mul, } // namespace bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetType(); + DataType::Type type = mul->GetType(); switch (isa) { - case kArm: - case kThumb2: - if (type != Primitive::kPrimInt) { + case InstructionSet::kArm: + case InstructionSet::kThumb2: + if (type != DataType::Type::kInt32) { return false; } break; - case kArm64: - if (!Primitive::IsIntOrLongType(type)) { + case InstructionSet::kArm64: + if (!DataType::IsIntOrLongType(type)) { return false; } break; @@ -105,7 +105,7 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { return false; } - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator(); if (mul->HasOnlyOneNonEnvironmentUse()) { HInstruction* use = mul->GetUses().front().GetUser(); @@ -137,24 +137,24 @@ bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) { if (accumulator != nullptr) { HMultiplyAccumulate* mulacc = - new (arena) HMultiplyAccumulate(type, - binop->GetKind(), - accumulator, - mul->GetLeft(), - mul->GetRight()); + new (allocator) HMultiplyAccumulate(type, + binop->GetKind(), + accumulator, + mul->GetLeft(), + mul->GetRight()); binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); DCHECK(!mul->HasUses()); mul->GetBlock()->RemoveInstruction(mul); return true; } - } else if (use->IsNeg() && isa != kArm) { + } else if (use->IsNeg() && isa != InstructionSet::kArm) { HMultiplyAccumulate* mulacc = - new (arena) HMultiplyAccumulate(type, - HInstruction::kSub, - mul->GetBlock()->GetGraph()->GetConstant(type, 0), - mul->GetLeft(), - mul->GetRight()); + new (allocator) HMultiplyAccumulate(type, + HInstruction::kSub, + mul->GetBlock()->GetGraph()->GetConstant(type, 0), + mul->GetLeft(), + mul->GetRight()); use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc); DCHECK(!mul->HasUses()); @@ -216,7 +216,7 @@ bool TryMergeNegatedInput(HBinaryOperation* op) { // BIC dst, src, mask (respectively ORN, EON) HInstruction* src = hnot->AsNot()->GetInput(); - HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetArena()) + HBitwiseNegatedRight* neg_op = new (hnot->GetBlock()->GetGraph()->GetAllocator()) HBitwiseNegatedRight(op->GetType(), op->GetKind(), hother, src, op->GetDexPc()); op->GetBlock()->ReplaceAndRemoveInstructionWith(op, neg_op); @@ -240,13 +240,13 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return false; } if (access->IsArraySet() && - access->AsArraySet()->GetValue()->GetType() == Primitive::kPrimNot) { + access->AsArraySet()->GetValue()->GetType() == DataType::Type::kReference) { // The access may require a runtime call or the original array pointer. return false; } if (kEmitCompilerReadBarrier && access->IsArrayGet() && - access->GetType() == Primitive::kPrimNot) { + access->GetType() == DataType::Type::kReference) { // For object arrays, the read barrier instrumentation requires // the original array pointer. // TODO: This can be relaxed for Baker CC. @@ -255,10 +255,10 @@ bool TryExtractArrayAccessAddress(HInstruction* access, // Proceed to extract the base address computation. HGraph* graph = access->GetBlock()->GetGraph(); - ArenaAllocator* arena = graph->GetArena(); + ArenaAllocator* allocator = graph->GetAllocator(); HIntConstant* offset = graph->GetIntConstant(data_offset); - HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc); + HIntermediateAddress* address = new (allocator) HIntermediateAddress(array, offset, kNoDexPc); // TODO: Is it ok to not have this on the intermediate address? // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo()); access->GetBlock()->InsertInstructionBefore(address, access); @@ -281,73 +281,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, return true; } -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa) { - Primitive::Type type = mul->GetPackedType(); - switch (isa) { - case kArm64: - if (!(type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort || - type == Primitive::kPrimInt)) { - return false; - } - break; - default: - return false; - } - - ArenaAllocator* arena = mul->GetBlock()->GetGraph()->GetArena(); - - if (mul->HasOnlyOneNonEnvironmentUse()) { - HInstruction* use = mul->GetUses().front().GetUser(); - if (use->IsVecAdd() || use->IsVecSub()) { - // Replace code looking like - // VECMUL tmp, x, y - // VECADD/SUB dst, acc, tmp - // with - // VECMULACC dst, acc, x, y - // Note that we do not want to (unconditionally) perform the merge when the - // multiplication has multiple uses and it can be merged in all of them. - // Multiple uses could happen on the same control-flow path, and we would - // then increase the amount of work. In the future we could try to evaluate - // whether all uses are on different control-flow paths (using dominance and - // reverse-dominance information) and only perform the merge when they are. - HInstruction* accumulator = nullptr; - HVecBinaryOperation* binop = use->AsVecBinaryOperation(); - HInstruction* binop_left = binop->GetLeft(); - HInstruction* binop_right = binop->GetRight(); - // This is always true since the `HVecMul` has only one use (which is checked above). - DCHECK_NE(binop_left, binop_right); - if (binop_right == mul) { - accumulator = binop_left; - } else if (use->IsVecAdd()) { - DCHECK_EQ(binop_left, mul); - accumulator = binop_right; - } - - HInstruction::InstructionKind kind = - use->IsVecAdd() ? HInstruction::kAdd : HInstruction::kSub; - if (accumulator != nullptr) { - HVecMultiplyAccumulate* mulacc = - new (arena) HVecMultiplyAccumulate(arena, - kind, - accumulator, - mul->GetLeft(), - mul->GetRight(), - binop->GetPackedType(), - binop->GetVectorLength()); - - binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc); - DCHECK(!mul->HasUses()); - mul->GetBlock()->RemoveInstruction(mul); - return true; - } - } - } - - return false; -} - bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) { if (index->IsConstant()) { // If index is constant the whole address calculation often can be done by LDR/STR themselves. @@ -356,11 +289,11 @@ bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* } HGraph* graph = access->GetBlock()->GetGraph(); - ArenaAllocator* arena = graph->GetArena(); - Primitive::Type packed_type = access->GetPackedType(); + ArenaAllocator* allocator = graph->GetAllocator(); + DataType::Type packed_type = access->GetPackedType(); uint32_t data_offset = mirror::Array::DataOffset( - Primitive::ComponentSize(packed_type)).Uint32Value(); - size_t component_shift = Primitive::ComponentSizeShift(packed_type); + DataType::Size(packed_type)).Uint32Value(); + size_t component_shift = DataType::SizeShift(packed_type); bool is_extracting_beneficial = false; // It is beneficial to extract index intermediate address only if there are at least 2 users. @@ -368,10 +301,10 @@ bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* HInstruction* user = use.GetUser(); if (user->IsVecMemoryOperation() && user != access) { HVecMemoryOperation* another_access = user->AsVecMemoryOperation(); - Primitive::Type another_packed_type = another_access->GetPackedType(); + DataType::Type another_packed_type = another_access->GetPackedType(); uint32_t another_data_offset = mirror::Array::DataOffset( - Primitive::ComponentSize(another_packed_type)).Uint32Value(); - size_t another_component_shift = Primitive::ComponentSizeShift(another_packed_type); + DataType::Size(another_packed_type)).Uint32Value(); + size_t another_component_shift = DataType::SizeShift(another_packed_type); if (another_data_offset == data_offset && another_component_shift == component_shift) { is_extracting_beneficial = true; break; @@ -395,7 +328,7 @@ bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* HIntConstant* offset = graph->GetIntConstant(data_offset); HIntConstant* shift = graph->GetIntConstant(component_shift); HIntermediateAddressIndex* address = - new (arena) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); + new (allocator) HIntermediateAddressIndex(index, offset, shift, kNoDexPc); access->GetBlock()->InsertInstructionBefore(address, access); access->ReplaceInput(address, 1); diff --git a/compiler/optimizing/instruction_simplifier_shared.h b/compiler/optimizing/instruction_simplifier_shared.h index 371619fa2e..758fc7663d 100644 --- a/compiler/optimizing/instruction_simplifier_shared.h +++ b/compiler/optimizing/instruction_simplifier_shared.h @@ -26,10 +26,10 @@ namespace helpers { inline bool CanFitInShifterOperand(HInstruction* instruction) { if (instruction->IsTypeConversion()) { HTypeConversion* conversion = instruction->AsTypeConversion(); - Primitive::Type result_type = conversion->GetResultType(); - Primitive::Type input_type = conversion->GetInputType(); + DataType::Type result_type = conversion->GetResultType(); + DataType::Type input_type = conversion->GetInputType(); // We don't expect to see the same type as input and result. - return Primitive::IsIntegralType(result_type) && Primitive::IsIntegralType(input_type) && + return DataType::IsIntegralType(result_type) && DataType::IsIntegralType(input_type) && (result_type != input_type); } else { return (instruction->IsShl() && instruction->AsShl()->InputAt(1)->IsIntConstant()) || @@ -41,7 +41,8 @@ inline bool CanFitInShifterOperand(HInstruction* instruction) { inline bool HasShifterOperand(HInstruction* instr, InstructionSet isa) { // On ARM64 `neg` instructions are an alias of `sub` using the zero register // as the first register input. - bool res = instr->IsAdd() || instr->IsAnd() || (isa == kArm64 && instr->IsNeg()) || + bool res = instr->IsAdd() || instr->IsAnd() || + (isa == InstructionSet::kArm64 && instr->IsNeg()) || instr->IsOr() || instr->IsSub() || instr->IsXor(); return res; } @@ -58,7 +59,6 @@ bool TryExtractArrayAccessAddress(HInstruction* access, HInstruction* index, size_t data_offset); -bool TryCombineVecMultiplyAccumulate(HVecMul* mul, InstructionSet isa); bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index); } // namespace art diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b664d41013..f8dc316e45 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -18,18 +18,28 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/utils.h" #include "class_linker.h" +#include "dex/invoke_type.h" #include "driver/compiler_driver.h" #include "driver/compiler_options.h" -#include "invoke_type.h" #include "mirror/dex_cache-inl.h" #include "nodes.h" #include "scoped_thread_state_change-inl.h" #include "thread-current-inl.h" -#include "utils.h" namespace art { +// Check that intrinsic enum values fit within space set aside in ArtMethod modifier flags. +#define CHECK_INTRINSICS_ENUM_VALUES(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ + static_assert( \ + static_cast<uint32_t>(Intrinsics::k ## Name) <= (kAccIntrinsicBits >> CTZ(kAccIntrinsicBits)), \ + "Instrinsics enumeration space overflow."); +#include "intrinsics_list.h" + INTRINSICS_LIST(CHECK_INTRINSICS_ENUM_VALUES) +#undef INTRINSICS_LIST +#undef CHECK_INTRINSICS_ENUM_VALUES + // Function that returns whether an intrinsic is static/direct or virtual. static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { switch (i) { @@ -39,7 +49,7 @@ static inline InvokeType GetIntrinsicInvokeType(Intrinsics i) { case Intrinsics::k ## Name: \ return IsStatic; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } @@ -55,7 +65,7 @@ static inline IntrinsicNeedsEnvironmentOrCache NeedsEnvironmentOrCache(Intrinsic case Intrinsics::k ## Name: \ return NeedsEnvironmentOrCache; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } @@ -71,7 +81,7 @@ static inline IntrinsicSideEffects GetSideEffects(Intrinsics i) { case Intrinsics::k ## Name: \ return SideEffects; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } @@ -87,14 +97,15 @@ static inline IntrinsicExceptions GetExceptions(Intrinsics i) { case Intrinsics::k ## Name: \ return Exceptions; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS } return kCanThrow; } -static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { +static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) + REQUIRES_SHARED(Locks::mutator_lock_) { // Whenever the intrinsic is marked as static, report an error if we find an InvokeVirtual. // // Whenever the intrinsic is marked as direct and we find an InvokeVirtual, a devirtualization @@ -109,6 +120,7 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { // InvokeStaticOrDirect. InvokeType intrinsic_type = GetIntrinsicInvokeType(intrinsic); InvokeType invoke_type = invoke->GetInvokeType(); + switch (intrinsic_type) { case kStatic: return (invoke_type == kStatic); @@ -119,18 +131,51 @@ static bool CheckInvokeType(Intrinsics intrinsic, HInvoke* invoke) { } if (invoke_type == kVirtual) { ArtMethod* art_method = invoke->GetResolvedMethod(); - ScopedObjectAccess soa(Thread::Current()); return (art_method->IsFinal() || art_method->GetDeclaringClass()->IsFinal()); } return false; case kVirtual: // Call might be devirtualized. - return (invoke_type == kVirtual || invoke_type == kDirect); + return (invoke_type == kVirtual || invoke_type == kDirect || invoke_type == kInterface); - default: + case kSuper: + case kInterface: + case kPolymorphic: return false; } + LOG(FATAL) << "Unknown intrinsic invoke type: " << intrinsic_type; + UNREACHABLE(); +} + +bool IntrinsicsRecognizer::Recognize(HInvoke* invoke, + ArtMethod* art_method, + /*out*/ bool* wrong_invoke_type) { + if (art_method == nullptr) { + art_method = invoke->GetResolvedMethod(); + } + *wrong_invoke_type = false; + if (art_method == nullptr || !art_method->IsIntrinsic()) { + return false; + } + + // TODO: b/65872996 The intent is that polymorphic signature methods should + // be compiler intrinsics. At present, they are only interpreter intrinsics. + if (art_method->IsPolymorphicSignature()) { + return false; + } + + Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); + if (CheckInvokeType(intrinsic, invoke) == false) { + *wrong_invoke_type = true; + return false; + } + + invoke->SetIntrinsic(intrinsic, + NeedsEnvironmentOrCache(intrinsic), + GetSideEffects(intrinsic), + GetExceptions(intrinsic)); + return true; } void IntrinsicsRecognizer::Run() { @@ -140,22 +185,14 @@ void IntrinsicsRecognizer::Run() { inst_it.Advance()) { HInstruction* inst = inst_it.Current(); if (inst->IsInvoke()) { - HInvoke* invoke = inst->AsInvoke(); - ArtMethod* art_method = invoke->GetResolvedMethod(); - if (art_method != nullptr && art_method->IsIntrinsic()) { - Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic()); - if (!CheckInvokeType(intrinsic, invoke)) { - LOG(WARNING) << "Found an intrinsic with unexpected invoke type: " - << static_cast<uint32_t>(intrinsic) << " for " - << art_method->PrettyMethod() - << invoke->DebugName(); - } else { - invoke->SetIntrinsic(intrinsic, - NeedsEnvironmentOrCache(intrinsic), - GetSideEffects(intrinsic), - GetExceptions(intrinsic)); - MaybeRecordStat(MethodCompilationStat::kIntrinsicRecognized); - } + bool wrong_invoke_type = false; + if (Recognize(inst->AsInvoke(), /* art_method */ nullptr, &wrong_invoke_type)) { + MaybeRecordStat(stats_, MethodCompilationStat::kIntrinsicRecognized); + } else if (wrong_invoke_type) { + LOG(WARNING) + << "Found an intrinsic with unexpected invoke type: " + << inst->AsInvoke()->GetResolvedMethod()->PrettyMethod() << " " + << inst->DebugName(); } } } @@ -172,7 +209,7 @@ std::ostream& operator<<(std::ostream& os, const Intrinsics& intrinsic) { os << # Name; \ break; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef STATIC_INTRINSICS_LIST #undef VIRTUAL_INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -208,7 +245,7 @@ void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke, } // The intrinsic will call if it needs to allocate a j.l.Integer. - LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary( + LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary( invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); if (!invoke->InputAt(0)->IsConstant()) { locations->SetInAt(0, Location::RequiresRegister()); diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h index 9da5a7fa3b..62991435c7 100644 --- a/compiler/optimizing/intrinsics.h +++ b/compiler/optimizing/intrinsics.h @@ -37,11 +37,19 @@ static constexpr uint64_t kNanDouble = 0x7ff8000000000000; // Recognize intrinsics from HInvoke nodes. class IntrinsicsRecognizer : public HOptimization { public: - IntrinsicsRecognizer(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kIntrinsicsRecognizerPassName, stats) {} + IntrinsicsRecognizer(HGraph* graph, + OptimizingCompilerStats* stats, + const char* name = kIntrinsicsRecognizerPassName) + : HOptimization(graph, name, stats) {} void Run() OVERRIDE; + // Static helper that recognizes intrinsic call. Returns true on success. + // If it fails due to invoke type mismatch, wrong_invoke_type is set. + // Useful to recognize intrinsics on individual calls outside this full pass. + static bool Recognize(HInvoke* invoke, ArtMethod* method, /*out*/ bool* wrong_invoke_type) + REQUIRES_SHARED(Locks::mutator_lock_); + static constexpr const char* kIntrinsicsRecognizerPassName = "intrinsics_recognition"; private: @@ -63,7 +71,7 @@ class IntrinsicVisitor : public ValueObject { Visit ## Name(invoke); \ return; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -77,7 +85,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) virtual void Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ } #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -100,7 +108,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) // We're moving potentially two or more locations to locations that could overlap, so we need // a parallel move resolver. - HParallelMove parallel_move(codegen->GetGraph()->GetArena()); + HParallelMove parallel_move(codegen->GetGraph()->GetAllocator()); for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) { HInstruction* input = invoke->InputAt(i); @@ -203,6 +211,7 @@ class StringEqualsOptimizations : public IntrinsicOptimizations { INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0); INTRINSIC_OPTIMIZATION(ArgumentIsString, 1); + INTRINSIC_OPTIMIZATION(NoReadBarrierForStringClass, 2); private: DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations); @@ -256,25 +265,63 @@ void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) { \ LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic() \ << " should have been converted to HIR"; \ } -#define UNREACHABLE_INTRINSICS(Arch) \ -UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ -UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ -UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ -UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN) \ -UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft) \ -UNREACHABLE_INTRINSIC(Arch, LongRotateLeft) \ -UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight) \ -UNREACHABLE_INTRINSIC(Arch, LongRotateRight) \ -UNREACHABLE_INTRINSIC(Arch, IntegerCompare) \ -UNREACHABLE_INTRINSIC(Arch, LongCompare) \ -UNREACHABLE_INTRINSIC(Arch, IntegerSignum) \ -UNREACHABLE_INTRINSIC(Arch, LongSignum) \ -UNREACHABLE_INTRINSIC(Arch, StringCharAt) \ -UNREACHABLE_INTRINSIC(Arch, StringIsEmpty) \ -UNREACHABLE_INTRINSIC(Arch, StringLength) \ -UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence) \ -UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence) \ -UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence) +#define UNREACHABLE_INTRINSICS(Arch) \ +UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits) \ +UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits) \ +UNREACHABLE_INTRINSIC(Arch, FloatIsNaN) \ +UNREACHABLE_INTRINSIC(Arch, DoubleIsNaN) \ +UNREACHABLE_INTRINSIC(Arch, IntegerRotateLeft) \ +UNREACHABLE_INTRINSIC(Arch, LongRotateLeft) \ +UNREACHABLE_INTRINSIC(Arch, IntegerRotateRight) \ +UNREACHABLE_INTRINSIC(Arch, LongRotateRight) \ +UNREACHABLE_INTRINSIC(Arch, IntegerCompare) \ +UNREACHABLE_INTRINSIC(Arch, LongCompare) \ +UNREACHABLE_INTRINSIC(Arch, IntegerSignum) \ +UNREACHABLE_INTRINSIC(Arch, LongSignum) \ +UNREACHABLE_INTRINSIC(Arch, StringCharAt) \ +UNREACHABLE_INTRINSIC(Arch, StringIsEmpty) \ +UNREACHABLE_INTRINSIC(Arch, StringLength) \ +UNREACHABLE_INTRINSIC(Arch, UnsafeLoadFence) \ +UNREACHABLE_INTRINSIC(Arch, UnsafeStoreFence) \ +UNREACHABLE_INTRINSIC(Arch, UnsafeFullFence) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleFullFence) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleAcquireFence) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleReleaseFence) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleLoadLoadFence) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleStoreStoreFence) \ +UNREACHABLE_INTRINSIC(Arch, MethodHandleInvokeExact) \ +UNREACHABLE_INTRINSIC(Arch, MethodHandleInvoke) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchange) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchangeAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndExchangeRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleCompareAndSet) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGet) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAdd) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAddAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndAddRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAnd) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAndAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseAndRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOr) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOrAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseOrRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXor) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXorAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndBitwiseXorRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSet) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSetAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetAndSetRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetOpaque) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleGetVolatile) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleSet) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleSetOpaque) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleSetRelease) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleSetVolatile) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSet) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetAcquire) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetPlain) \ +UNREACHABLE_INTRINSIC(Arch, VarHandleWeakCompareAndSetRelease) template <typename IntrinsicLocationsBuilder, typename Codegenerator> bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) { diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc index 5691dd0d4a..0e6485be9f 100644 --- a/compiler/optimizing/intrinsics_arm64.cc +++ b/compiler/optimizing/intrinsics_arm64.cc @@ -21,6 +21,7 @@ #include "code_generator_arm64.h" #include "common_arm64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "lock_word.h" #include "mirror/array-inl.h" @@ -69,22 +70,22 @@ MacroAssembler* IntrinsicCodeGeneratorARM64::GetVIXLAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorARM64::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } #define __ codegen->GetVIXLAssembler()-> static void MoveFromReturnRegister(Location trg, - Primitive::Type type, + DataType::Type type, CodeGeneratorARM64* codegen) { if (!trg.IsValid()) { - DCHECK(type == Primitive::kPrimVoid); + DCHECK(type == DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); - if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { + if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { Register trg_reg = RegisterFrom(trg, type); Register res_reg = RegisterFrom(ARM64ReturnLocation(type), type); __ Mov(trg_reg, res_reg, kDiscardForSameWReg); @@ -172,7 +173,7 @@ class ReadBarrierSystemArrayCopySlowPathARM64 : public SlowPathCodeARM64 { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + const int32_t element_size = DataType::Size(DataType::Type::kReference); Register src_curr_addr = XRegisterFrom(locations->GetTemp(0)); Register dst_curr_addr = XRegisterFrom(locations->GetTemp(1)); @@ -235,18 +236,16 @@ bool IntrinsicLocationsBuilderARM64::TryDispatch(HInvoke* invoke) { #define __ masm-> -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); } @@ -266,10 +265,10 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MacroAssembler } void IntrinsicLocationsBuilderARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -280,10 +279,10 @@ void IntrinsicCodeGeneratorARM64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -293,27 +292,26 @@ void IntrinsicCodeGeneratorARM64::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } static void GenReverseBytes(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, MacroAssembler* masm) { Location in = locations->InAt(0); Location out = locations->Out(); switch (type) { - case Primitive::kPrimShort: + case DataType::Type::kInt16: __ Rev16(WRegisterFrom(out), WRegisterFrom(in)); __ Sxth(WRegisterFrom(out), WRegisterFrom(out)); break; - case Primitive::kPrimInt: - case Primitive::kPrimLong: + case DataType::Type::kInt32: + case DataType::Type::kInt64: __ Rev(RegisterFrom(out, type), RegisterFrom(in, type)); break; default: @@ -323,42 +321,41 @@ static void GenReverseBytes(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitLongReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetVIXLAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetVIXLAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } static void GenNumberOfLeadingZeros(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); Location in = locations->InAt(0); Location out = locations->Out(); @@ -367,25 +364,25 @@ static void GenNumberOfLeadingZeros(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); + GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); + GenNumberOfLeadingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); } static void GenNumberOfTrailingZeros(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); Location in = locations->InAt(0); Location out = locations->Out(); @@ -395,25 +392,25 @@ static void GenNumberOfTrailingZeros(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); + GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); + GenNumberOfTrailingZeros(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); } static void GenReverse(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, MacroAssembler* masm) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); Location in = locations->InAt(0); Location out = locations->Out(); @@ -422,31 +419,31 @@ static void GenReverse(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitIntegerReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetVIXLAssembler()); + GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitLongReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitLongReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetVIXLAssembler()); + GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetVIXLAssembler()); } -static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* masm) { - DCHECK(Primitive::IsIntOrLongType(type)) << type; - DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); - DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); +static void GenBitCount(HInvoke* instr, DataType::Type type, MacroAssembler* masm) { + DCHECK(DataType::IsIntOrLongType(type)) << type; + DCHECK_EQ(instr->GetType(), DataType::Type::kInt32); + DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type); UseScratchRegisterScope temps(masm); Register src = InputRegisterAt(instr, 0); Register dst = RegisterFrom(instr->GetLocations()->Out(), type); - FPRegister fpr = (type == Primitive::kPrimLong) ? temps.AcquireD() : temps.AcquireS(); + FPRegister fpr = (type == DataType::Type::kInt64) ? temps.AcquireD() : temps.AcquireS(); __ Fmov(fpr, src); __ Cnt(fpr.V8B(), fpr.V8B()); @@ -455,25 +452,86 @@ static void GenBitCount(HInvoke* instr, Primitive::Type type, MacroAssembler* ma } void IntrinsicLocationsBuilderARM64::VisitLongBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke, Primitive::kPrimLong, GetVIXLAssembler()); + GenBitCount(invoke, DataType::Type::kInt64, GetVIXLAssembler()); } void IntrinsicLocationsBuilderARM64::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke, Primitive::kPrimInt, GetVIXLAssembler()); + GenBitCount(invoke, DataType::Type::kInt32, GetVIXLAssembler()); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void GenHighestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + + UseScratchRegisterScope temps(masm); + + Register src = InputRegisterAt(invoke, 0); + Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); + Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); + size_t high_bit = (type == DataType::Type::kInt64) ? 63u : 31u; + size_t clz_high_bit = (type == DataType::Type::kInt64) ? 6u : 5u; + + __ Clz(temp, src); + __ Mov(dst, UINT64_C(1) << high_bit); // MOV (bitmask immediate) + __ Bic(dst, dst, Operand(temp, LSL, high_bit - clz_high_bit)); // Clear dst if src was 0. + __ Lsr(dst, dst, temp); +} + +void IntrinsicLocationsBuilderARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitLongHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitLongHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); +} + +static void GenLowestOneBit(HInvoke* invoke, DataType::Type type, MacroAssembler* masm) { + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); + + UseScratchRegisterScope temps(masm); + + Register src = InputRegisterAt(invoke, 0); + Register dst = RegisterFrom(invoke->GetLocations()->Out(), type); + Register temp = (type == DataType::Type::kInt64) ? temps.AcquireX() : temps.AcquireW(); + + __ Neg(temp, src); + __ And(dst, temp, src); +} + +void IntrinsicLocationsBuilderARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, DataType::Type::kInt32, GetVIXLAssembler()); +} + +void IntrinsicLocationsBuilderARM64::VisitLongLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, DataType::Type::kInt64, GetVIXLAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } @@ -489,7 +547,7 @@ static void MathAbsFP(LocationSummary* locations, bool is64bit, MacroAssembler* } void IntrinsicLocationsBuilderARM64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { @@ -497,21 +555,13 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetVIXLAssembler()); } -static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); -} - static void GenAbsInteger(LocationSummary* locations, bool is64bit, MacroAssembler* masm) { @@ -526,7 +576,7 @@ static void GenAbsInteger(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { @@ -534,7 +584,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAbsLong(HInvoke* invoke) { @@ -559,17 +609,16 @@ static void GenMinMaxFP(LocationSummary* locations, } } -static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -577,7 +626,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -585,7 +634,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -593,7 +642,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -618,7 +667,7 @@ static void GenMinMax(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { @@ -626,7 +675,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { @@ -634,7 +683,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { @@ -642,7 +691,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { @@ -650,7 +699,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathMaxLongLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) { @@ -660,7 +709,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathSqrt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathCeil(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) { @@ -670,7 +719,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathCeil(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathFloor(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) { @@ -680,7 +729,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathFloor(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathRint(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { @@ -689,10 +738,9 @@ void IntrinsicCodeGeneratorARM64::VisitMathRint(HInvoke* invoke) { __ Frintn(DRegisterFrom(locations->Out()), DRegisterFrom(locations->InAt(0))); } -static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntPlusFPTempLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -736,7 +784,7 @@ static void GenMathRound(HInvoke* invoke, bool is_double, vixl::aarch64::MacroAs } void IntrinsicLocationsBuilderARM64::VisitMathRoundDouble(HInvoke* invoke) { - CreateFPToIntPlusFPTempLocations(arena_, invoke); + CreateFPToIntPlusFPTempLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { @@ -744,7 +792,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathRoundDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathRoundFloat(HInvoke* invoke) { - CreateFPToIntPlusFPTempLocations(arena_, invoke); + CreateFPToIntPlusFPTempLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { @@ -752,7 +800,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -762,7 +810,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPeekByte(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { @@ -772,7 +820,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPeekIntNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { @@ -782,7 +830,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPeekLongNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { @@ -791,16 +839,15 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPeekShortNative(HInvoke* invoke) { AbsoluteHeapOperandFrom(invoke->GetLocations()->InAt(0), 0)); } -static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } void IntrinsicLocationsBuilderARM64::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) { @@ -810,7 +857,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPokeByte(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { @@ -820,7 +867,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPokeIntNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { @@ -830,7 +877,7 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPokeLongNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { @@ -840,25 +887,24 @@ void IntrinsicCodeGeneratorARM64::VisitMemoryPokeShortNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } void IntrinsicCodeGeneratorARM64::VisitThreadCurrentThread(HInvoke* invoke) { - codegen_->Load(Primitive::kPrimNot, WRegisterFrom(invoke->GetLocations()->Out()), + codegen_->Load(DataType::Type::kReference, WRegisterFrom(invoke->GetLocations()->Out()), MemOperand(tr, Thread::PeerOffset<kArm64PointerSize>().Int32Value())); } static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, CodeGeneratorARM64* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); + DCHECK((type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)); Location base_loc = locations->InAt(1); Register base = WRegisterFrom(base_loc); // Object pointer. Location offset_loc = locations->InAt(2); @@ -866,7 +912,7 @@ static void GenUnsafeGet(HInvoke* invoke, Location trg_loc = locations->Out(); Register trg = RegisterFrom(trg_loc, type); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case. Register temp = WRegisterFrom(locations->GetTemp(0)); codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke, @@ -887,22 +933,23 @@ static void GenUnsafeGet(HInvoke* invoke, codegen->Load(type, trg, mem_op); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { DCHECK(trg.IsW()); codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc); } } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. // We need a temporary register for the read barrier marking slow @@ -917,47 +964,46 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke } void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -965,35 +1011,35 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicLocationsBuilderARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } static void GenUnsafePut(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, bool is_ordered, CodeGeneratorARM64* codegen) { @@ -1011,7 +1057,7 @@ static void GenUnsafePut(HInvoke* invoke, // freeing the temporary registers so they can be used in `MarkGCCard`. UseScratchRegisterScope temps(masm); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { DCHECK(value.IsW()); Register temp = temps.AcquireW(); __ Mov(temp.W(), value.W()); @@ -1026,7 +1072,7 @@ static void GenUnsafePut(HInvoke* invoke, } } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); } @@ -1034,79 +1080,80 @@ static void GenUnsafePut(HInvoke* invoke, void IntrinsicCodeGeneratorARM64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ true, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ true, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke, - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ true, /* is_ordered */ false, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type) { + DataType::Type type) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1117,17 +1164,17 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, // operations to potentially clobber the output. Likewise when // emitting a (Baker) read barrier, which may call. Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) + ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // Temporary register for (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); } } -static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) { +static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARM64* codegen) { MacroAssembler* masm = codegen->GetVIXLAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1141,7 +1188,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co Register value = RegisterFrom(locations->InAt(4), type); // Value. // This needs to be before the temp registers, as MarkGCCard also uses VIXL temps. - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Mark card for object assuming new value is stored. bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); @@ -1173,7 +1220,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co __ Add(tmp_ptr, base.X(), Operand(offset)); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { codegen->GetAssembler()->PoisonHeapReference(expected); if (value.Is(expected)) { // Do not poison `value`, as it is the same register as @@ -1198,7 +1245,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co __ Bind(&exit_loop); __ Cset(out, eq); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { codegen->GetAssembler()->UnpoisonHeapReference(expected); if (value.Is(expected)) { // Do not unpoison `value`, as it is the same register as @@ -1210,10 +1257,10 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* co } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the @@ -1222,29 +1269,30 @@ void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntIntIntToInt(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimInt, codegen_); + GenCas(invoke, DataType::Type::kInt32, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimLong, codegen_); + GenCas(invoke, DataType::Type::kInt64, codegen_); } void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke, Primitive::kPrimNot, codegen_); + GenCas(invoke, DataType::Type::kReference, codegen_); } void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - invoke->InputAt(1)->CanBeNull() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -1291,7 +1339,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { SlowPathCodeARM64* slow_path = nullptr; const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); if (can_slow_path) { - slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); __ Cbz(arg, slow_path->GetEntryLabel()); } @@ -1342,7 +1390,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); // Promote temp2 to an X reg, ready for LDR. @@ -1402,7 +1450,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringCompareTo(HInvoke* invoke) { __ Bind(&different_compression); // Comparison for different compression style. - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); temp1 = temp1.W(); temp2 = temp2.W(); @@ -1471,9 +1519,15 @@ static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_lengt } void IntrinsicLocationsBuilderARM64::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1579,12 +1633,13 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { } // Assertions that must hold in order to compare strings 8 bytes at a time. + // Ok to do this because strings are zero-padded to kObjectAlignment. DCHECK_ALIGNED(value_offset, 8); static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded"); if (const_string != nullptr && - const_string_length < (is_compressed ? kShortConstStringEqualsCutoffInBytes - : kShortConstStringEqualsCutoffInBytes / 2u)) { + const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes + : kShortConstStringEqualsCutoffInBytes / 2u)) { // Load and compare the contents. Though we know the contents of the short const string // at compile time, materializing constants may be more code than loading from memory. int32_t offset = value_offset; @@ -1592,7 +1647,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 8u); temp = temp.X(); temp1 = temp1.X(); - while (remaining_bytes > 8u) { + while (remaining_bytes > sizeof(uint64_t)) { Register temp2 = XRegisterFrom(locations->GetTemp(0)); __ Ldp(temp, temp1, MemOperand(str.X(), offset)); __ Ldp(temp2, out, MemOperand(arg.X(), offset)); @@ -1628,7 +1683,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { temp1 = temp1.X(); Register temp2 = XRegisterFrom(locations->GetTemp(0)); // Loop to compare strings 8 bytes at a time starting at the front of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. __ Bind(&loop); __ Ldr(out, MemOperand(str.X(), temp1)); __ Ldr(temp2, MemOperand(arg.X(), temp1)); @@ -1655,7 +1709,6 @@ void IntrinsicCodeGeneratorARM64::VisitStringEquals(HInvoke* invoke) { static void GenerateVisitStringIndexOf(HInvoke* invoke, MacroAssembler* masm, CodeGeneratorARM64* codegen, - ArenaAllocator* allocator, bool start_at_zero) { LocationSummary* locations = invoke->GetLocations(); @@ -1670,16 +1723,16 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) > 0xFFFFU) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { Register char_reg = WRegisterFrom(locations->InAt(1)); __ Tst(char_reg, 0xFFFF0000); - slow_path = new (allocator) IntrinsicSlowPathARM64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen->AddSlowPath(slow_path); __ B(ne, slow_path->GetEntryLabel()); } @@ -1699,53 +1752,48 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); // Need to send start_index=0. locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2))); } void IntrinsicCodeGeneratorARM64::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf( - invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ true); } void IntrinsicLocationsBuilderARM64::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimInt)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kInt32)); } void IntrinsicCodeGeneratorARM64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf( - invoke, GetVIXLAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetVIXLAssembler(), codegen_, /* start_at_zero */ false); } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) { @@ -1754,7 +1802,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) Register byte_array = WRegisterFrom(locations->InAt(0)); __ Cmp(byte_array, 0); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); @@ -1764,14 +1813,13 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromBytes(HInvoke* invoke) } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) { @@ -1786,12 +1834,11 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromChars(HInvoke* invoke) } void IntrinsicLocationsBuilderARM64::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimNot)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kReference)); } void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke) { @@ -1800,7 +1847,8 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke Register string_to_copy = WRegisterFrom(locations->InAt(0)); __ Cmp(string_to_copy, 0); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); @@ -1809,29 +1857,27 @@ void IntrinsicCodeGeneratorARM64::VisitStringNewStringFromString(HInvoke* invoke __ Bind(slow_path->GetExitLabel()); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); - DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType())); - DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); - LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType())); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); - DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(0)->GetType())); - DCHECK(Primitive::IsFloatingPointType(invoke->InputAt(1)->GetType())); - DCHECK(Primitive::IsFloatingPointType(invoke->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(0)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->InputAt(1)->GetType())); + DCHECK(DataType::IsFloatingPointType(invoke->GetType())); - LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0))); @@ -1846,7 +1892,7 @@ static void GenFPToFPCall(HInvoke* invoke, } void IntrinsicLocationsBuilderARM64::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { @@ -1854,7 +1900,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathCos(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { @@ -1862,7 +1908,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathSin(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { @@ -1870,7 +1916,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAcos(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { @@ -1878,7 +1924,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAsin(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { @@ -1886,7 +1932,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathAtan(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { @@ -1894,7 +1940,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathCbrt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { @@ -1902,7 +1948,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathCosh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { @@ -1910,7 +1956,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathExp(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { @@ -1918,7 +1964,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathExpm1(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { @@ -1926,7 +1972,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathLog(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { @@ -1934,7 +1980,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathLog10(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { @@ -1942,7 +1988,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathSinh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { @@ -1950,7 +1996,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathTan(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { @@ -1958,15 +2004,23 @@ void IntrinsicCodeGeneratorARM64::VisitMathTanh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARM64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARM64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARM64::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { @@ -1974,7 +2028,7 @@ void IntrinsicCodeGeneratorARM64::VisitMathHypot(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { @@ -1982,9 +2036,8 @@ void IntrinsicCodeGeneratorARM64::VisitMathNextAfter(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2001,7 +2054,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); // Location of data in char array buffer. @@ -2080,7 +2133,7 @@ void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ B(&done); if (mirror::kUseStringCompression) { - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); __ Bind(&compressed_string_preloop); __ Add(src_ptr, src_ptr, Operand(srcBegin)); @@ -2134,10 +2187,9 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { } } - ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); - LocationSummary* locations = new (allocator) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(char[] src, int src_pos, char[] dst, int dst_pos, int length). locations->SetInAt(0, Location::RequiresRegister()); SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); @@ -2164,7 +2216,7 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm, if (!length_is_input_length) { // Check that length(input) >= length. __ Ldr(temp, MemOperand(input, length_offset)); - __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt)); + __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); __ B(slow_path->GetEntryLabel(), lt); } } else { @@ -2174,7 +2226,7 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm, __ B(slow_path->GetEntryLabel(), lt); // Check that (length(input) - pos) >= length. - __ Cmp(temp, OperandFrom(length, Primitive::kPrimInt)); + __ Cmp(temp, OperandFrom(length, DataType::Type::kInt32)); __ B(slow_path->GetEntryLabel(), lt); } } else if (length_is_input_length) { @@ -2189,7 +2241,7 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm, __ Ldr(temp, MemOperand(input, length_offset)); __ Subs(temp, temp, pos_reg); // Ccmp if length(input) >= pos, else definitely bail to slow path (N!=V == lt). - __ Ccmp(temp, OperandFrom(length, Primitive::kPrimInt), NFlag, ge); + __ Ccmp(temp, OperandFrom(length, DataType::Type::kInt32), NFlag, ge); __ B(slow_path->GetEntryLabel(), lt); } } @@ -2198,7 +2250,7 @@ static void CheckSystemArrayCopyPosition(MacroAssembler* masm, // source address for System.arraycopy* intrinsics in `src_base`, // `dst_base` and `src_end` respectively. static void GenSystemArrayCopyAddresses(MacroAssembler* masm, - Primitive::Type type, + DataType::Type type, const Register& src, const Location& src_pos, const Register& dst, @@ -2208,10 +2260,10 @@ static void GenSystemArrayCopyAddresses(MacroAssembler* masm, const Register& dst_base, const Register& src_end) { // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics. - DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar) + DCHECK(type == DataType::Type::kReference || type == DataType::Type::kUint16) << "Unexpected element type: " << type; - const int32_t element_size = Primitive::ComponentSize(type); - const int32_t element_size_shift = Primitive::ComponentSizeShift(type); + const int32_t element_size = DataType::Size(type); + const int32_t element_size_shift = DataType::SizeShift(type); const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { @@ -2247,7 +2299,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { Location dst_pos = locations->InAt(3); Location length = locations->InAt(4); - SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + SlowPathCodeARM64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(slow_path); // If source and destination are the same, take the slow path. Overlapping copy regions must be @@ -2298,7 +2351,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { src_stop_addr = src_stop_addr.X(); GenSystemArrayCopyAddresses(masm, - Primitive::kPrimChar, + DataType::Type::kUint16, src, src_pos, dst, @@ -2309,7 +2362,7 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopyChar(HInvoke* invoke) { src_stop_addr); // Iterate over the arrays and do a raw copy of the chars. - const int32_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const int32_t char_size = DataType::Size(DataType::Type::kUint16); UseScratchRegisterScope temps(masm); Register tmp = temps.AcquireW(); vixl::aarch64::Label loop, done; @@ -2373,10 +2426,9 @@ void IntrinsicLocationsBuilderARM64::VisitSystemArrayCopy(HInvoke* invoke) { return; } - ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetArena(); - LocationSummary* locations = new (allocator) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); + ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator(); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). locations->SetInAt(0, Location::RequiresRegister()); SetSystemArrayCopyLocationRequires(locations, 1, invoke->InputAt(1)); @@ -2425,7 +2477,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { Register temp2 = WRegisterFrom(locations->GetTemp(1)); Location temp2_loc = LocationFrom(temp2); - SlowPathCodeARM64* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke); + SlowPathCodeARM64* intrinsic_slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARM64(invoke); codegen_->AddSlowPath(intrinsic_slow_path); vixl::aarch64::Label conditions_on_positions_validated; @@ -2726,8 +2779,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { Register dst_curr_addr = temp2.X(); Register src_stop_addr = temp3.X(); vixl::aarch64::Label done; - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); + const DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); if (length.IsRegister()) { // Don't enter the copy loop if the length is null. @@ -2802,7 +2855,8 @@ void IntrinsicCodeGeneratorARM64::VisitSystemArrayCopy(HInvoke* invoke) { // Slow path used to copy array when `src` is gray. SlowPathCodeARM64* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp)); + new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARM64( + invoke, LocationFrom(tmp)); codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the rb_state. @@ -2882,7 +2936,7 @@ static void GenIsInfinite(LocationSummary* locations, } void IntrinsicLocationsBuilderARM64::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { @@ -2890,7 +2944,7 @@ void IntrinsicCodeGeneratorARM64::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARM64::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -2902,7 +2956,7 @@ void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) { IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, - calling_convention.GetReturnLocation(Primitive::kPrimNot), + calling_convention.GetReturnLocation(DataType::Type::kReference), Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode())); } @@ -2911,7 +2965,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); MacroAssembler* masm = GetVIXLAssembler(); - Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot); + Register out = RegisterFrom(locations->Out(), DataType::Type::kReference); UseScratchRegisterScope temps(masm); Register temp = temps.AcquireW(); InvokeRuntimeCallingConvention calling_convention; @@ -2941,7 +2995,7 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore); } } else { - Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt); + Register in = RegisterFrom(locations->InAt(0), DataType::Type::kInt32); // Check bounds of our cache. __ Add(out.W(), in.W(), -info.low); __ Cmp(out.W(), info.high - info.low + 1); @@ -2952,8 +3006,8 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); MemOperand source = HeapOperand( - temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot)); - codegen_->Load(Primitive::kPrimNot, out, source); + temp, out.X(), LSL, DataType::SizeShift(DataType::Type::kReference)); + codegen_->Load(DataType::Type::kReference, out, source); codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); @@ -2971,15 +3025,14 @@ void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicLocationsBuilderARM64::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { MacroAssembler* masm = GetVIXLAssembler(); - Register out = RegisterFrom(invoke->GetLocations()->Out(), Primitive::kPrimInt); + Register out = RegisterFrom(invoke->GetLocations()->Out(), DataType::Type::kInt32); UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); @@ -2992,11 +3045,15 @@ void IntrinsicCodeGeneratorARM64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderARM64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorARM64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h index ff59ce9658..033a644f34 100644 --- a/compiler/optimizing/intrinsics_arm64.h +++ b/compiler/optimizing/intrinsics_arm64.h @@ -39,15 +39,15 @@ class CodeGeneratorARM64; class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor { public: - explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen) - : arena_(arena), codegen_(codegen) {} + explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* allocator, CodeGeneratorARM64* codegen) + : allocator_(allocator), codegen_(codegen) {} // Define visitor methods. #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -57,8 +57,8 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - ArenaAllocator* arena_; - CodeGeneratorARM64* codegen_; + ArenaAllocator* const allocator_; + CodeGeneratorARM64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64); }; @@ -72,7 +72,7 @@ class IntrinsicCodeGeneratorARM64 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -81,7 +81,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); - CodeGeneratorARM64* codegen_; + CodeGeneratorARM64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARM64); }; diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc index 8b4044d69b..97a145664c 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.cc +++ b/compiler/optimizing/intrinsics_arm_vixl.cc @@ -20,6 +20,7 @@ #include "art_method.h" #include "code_generator_arm_vixl.h" #include "common_arm.h" +#include "heap_poisoning.h" #include "lock_word.h" #include "mirror/array-inl.h" #include "mirror/object_array-inl.h" @@ -64,7 +65,7 @@ ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } // Default slow-path for fallback (calling the managed code to handle the intrinsic) in an @@ -125,16 +126,16 @@ class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL { // Compute base address for the System.arraycopy intrinsic in `base`. static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler, - Primitive::Type type, + DataType::Type type, const vixl32::Register& array, const Location& pos, const vixl32::Register& base) { // This routine is only used by the SystemArrayCopy intrinsic at the - // moment. We can allow Primitive::kPrimNot as `type` to implement + // moment. We can allow DataType::Type::kReference as `type` to implement // the SystemArrayCopyChar intrinsic. - DCHECK_EQ(type, Primitive::kPrimNot); - const int32_t element_size = Primitive::ComponentSize(type); - const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + DCHECK_EQ(type, DataType::Type::kReference); + const int32_t element_size = DataType::Size(type); + const uint32_t element_size_shift = DataType::SizeShift(type); const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (pos.IsConstant()) { @@ -148,16 +149,16 @@ static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler, // Compute end address for the System.arraycopy intrinsic in `end`. static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler, - Primitive::Type type, + DataType::Type type, const Location& copy_length, const vixl32::Register& base, const vixl32::Register& end) { // This routine is only used by the SystemArrayCopy intrinsic at the - // moment. We can allow Primitive::kPrimNot as `type` to implement + // moment. We can allow DataType::Type::kReference as `type` to implement // the SystemArrayCopyChar intrinsic. - DCHECK_EQ(type, Primitive::kPrimNot); - const int32_t element_size = Primitive::ComponentSize(type); - const uint32_t element_size_shift = Primitive::ComponentSizeShift(type); + DCHECK_EQ(type, DataType::Type::kReference); + const int32_t element_size = DataType::Size(type); + const uint32_t element_size_shift = DataType::SizeShift(type); if (copy_length.IsConstant()) { int32_t constant = Int32ConstantFrom(copy_length); @@ -187,8 +188,8 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); + DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); vixl32::Register dest = InputRegisterAt(instruction_, 2); Location dest_pos = locations->InAt(3); @@ -245,7 +246,7 @@ class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL { }; IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen) - : arena_(codegen->GetGraph()->GetArena()), + : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen), assembler_(codegen->GetAssembler()), features_(codegen->GetInstructionSetFeatures()) {} @@ -259,18 +260,16 @@ bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) { return res->Intrinsified(); } -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); } @@ -296,10 +295,10 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembl } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -310,10 +309,10 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -323,33 +322,38 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); +} + +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } static void GenNumberOfLeadingZeros(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, CodeGeneratorARMVIXL* codegen) { ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); Location in = locations->InAt(0); vixl32::Register out = RegisterFrom(locations->Out()); - DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64)); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { vixl32::Register in_reg_lo = LowRegisterFrom(in); vixl32::Register in_reg_hi = HighRegisterFrom(in); vixl32::Label end; @@ -367,35 +371,31 @@ static void GenNumberOfLeadingZeros(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_); + GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_); + GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_); } static void GenNumberOfTrailingZeros(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, CodeGeneratorARMVIXL* codegen) { - DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong)); + DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64)); ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); vixl32::Register out = RegisterFrom(locations->Out()); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0)); vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0)); vixl32::Label end; @@ -417,27 +417,19 @@ static void GenNumberOfTrailingZeros(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_); + GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_); + GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_); } static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { @@ -445,7 +437,7 @@ static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { @@ -453,17 +445,16 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke, GetAssembler()); } -static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -502,7 +493,7 @@ static void GenAbsInteger(LocationSummary* locations, } void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(arena_, invoke); + CreateIntToIntPlusTemp(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { @@ -511,7 +502,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) { void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(arena_, invoke); + CreateIntToIntPlusTemp(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) { @@ -578,17 +569,16 @@ static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* c } } -static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); invoke->GetLocations()->AddTemp(Location::RequiresRegister()); } @@ -597,7 +587,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); invoke->GetLocations()->AddTemp(Location::RequiresRegister()); } @@ -657,7 +647,7 @@ static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -665,7 +655,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -711,17 +701,16 @@ static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assemb } } -static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(arena_, invoke); + CreateLongLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { @@ -729,7 +718,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(arena_, invoke); + CreateLongLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) { @@ -754,17 +743,16 @@ static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) } } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { @@ -772,7 +760,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { @@ -780,7 +768,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { @@ -790,7 +778,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) { void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } } @@ -802,9 +790,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) { void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -853,7 +840,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { @@ -863,7 +850,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { @@ -873,7 +860,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { @@ -894,7 +881,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) { @@ -903,16 +890,15 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0)))); } -static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { @@ -921,7 +907,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { @@ -930,7 +916,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { @@ -944,7 +930,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) { @@ -953,9 +939,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -966,7 +951,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) { } static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, CodeGeneratorARMVIXL* codegen) { LocationSummary* locations = invoke->GetLocations(); @@ -978,7 +963,7 @@ static void GenUnsafeGet(HInvoke* invoke, Location trg_loc = locations->Out(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { vixl32::Register trg = RegisterFrom(trg_loc); __ Ldr(trg, MemOperand(base, offset)); if (is_volatile) { @@ -987,7 +972,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { vixl32::Register trg = RegisterFrom(trg_loc); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { @@ -1014,7 +999,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { vixl32::Register trg_lo = LowRegisterFrom(trg_loc); vixl32::Register trg_hi = HighRegisterFrom(trg_loc); if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) { @@ -1037,17 +1022,18 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type) { + DataType::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -1056,7 +1042,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); @@ -1064,63 +1050,62 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, +static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, const ArmInstructionSetFeatures& features, - Primitive::Type type, + DataType::Type type, bool is_volatile, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { // Potentially need temps for ldrexd-strexd loop. if (is_volatile && !features.HasAtomicLdrdAndStrd()) { locations->AddTemp(Location::RequiresRegister()); // Temp_lo. locations->AddTemp(Location::RequiresRegister()); // Temp_hi. } - } else if (type == Primitive::kPrimNot) { + } else if (type == DataType::Type::kReference) { // Temps for card-marking. locations->AddTemp(Location::RequiresRegister()); // Temp. locations->AddTemp(Location::RequiresRegister()); // Card. @@ -1128,38 +1113,44 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke); + CreateIntIntIntIntToVoid( + allocator_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoid( - arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoid( - arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoid( - arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke); + allocator_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke); } static void GenUnsafePut(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool is_volatile, bool is_ordered, CodeGeneratorARMVIXL* codegen) { @@ -1173,7 +1164,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Dmb(vixl32::ISH); } - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3)); vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3)); value = value_lo; @@ -1196,7 +1187,7 @@ static void GenUnsafePut(LocationSummary* locations, } else { value = RegisterFrom(locations->InAt(3)); vixl32::Register source = value; - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); __ Mov(temp, value); assembler->PoisonHeapReference(temp); @@ -1209,7 +1200,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Dmb(vixl32::ISH); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); vixl32::Register card = RegisterFrom(locations->GetTemp(1)); bool value_can_be_null = true; // TODO: Worth finding out this information? @@ -1219,79 +1210,80 @@ static void GenUnsafePut(LocationSummary* locations, void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ true, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ true, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ false, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ true, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ true, /* is_ordered */ false, codegen_); } -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type) { + DataType::Type type) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1302,7 +1294,7 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, // operations to potentially clobber the output. Likewise when // emitting a (Baker) read barrier, which may call. Location::OutputOverlap overlaps = - ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call) + ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call) ? Location::kOutputOverlap : Location::kNoOutputOverlap; locations->SetOut(Location::RequiresRegister(), overlaps); @@ -1314,8 +1306,8 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, locations->AddTemp(Location::RequiresRegister()); // Temp 1. } -static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) { - DCHECK_NE(type, Primitive::kPrimLong); +static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) { + DCHECK_NE(type, DataType::Type::kInt64); ArmVIXLAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); @@ -1333,7 +1325,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory. vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory. - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); @@ -1365,7 +1357,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* __ Add(tmp_ptr, base, offset); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { codegen->GetAssembler()->PoisonHeapReference(expected); if (value.Is(expected)) { // Do not poison `value`, as it is the same register as @@ -1412,7 +1404,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* __ mov(cc, out, 0); } - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { codegen->GetAssembler()->UnpoisonHeapReference(expected); if (value.Is(expected)) { // Do not unpoison `value`, as it is the same register as @@ -1424,7 +1416,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the @@ -1433,26 +1425,27 @@ void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimInt, codegen_); + GenCas(invoke, DataType::Type::kInt32, codegen_); } void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke, Primitive::kPrimNot, codegen_); + GenCas(invoke, DataType::Type::kReference, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // The inputs plus one temp. - LocationSummary* locations = new (arena_) LocationSummary(invoke, - invoke->InputAt(1)->CanBeNull() - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, + invoke->InputAt(1)->CanBeNull() + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -1465,30 +1458,38 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) { locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } +// Forward declaration. +// +// ART build system imposes a size limit (deviceFrameSizeLimit) on the stack frames generated +// by the compiler for every C++ function, and if this function gets inlined in +// IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo, the limit will be exceeded, resulting in a +// build failure. That is the reason why NO_INLINE attribute is used. +static void NO_INLINE GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, + HInvoke* invoke, + vixl32::Label* end, + vixl32::Label* different_compression); + void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { ArmVIXLAssembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - vixl32::Register str = InputRegisterAt(invoke, 0); - vixl32::Register arg = InputRegisterAt(invoke, 1); - vixl32::Register out = OutputRegister(invoke); + const vixl32::Register str = InputRegisterAt(invoke, 0); + const vixl32::Register arg = InputRegisterAt(invoke, 1); + const vixl32::Register out = OutputRegister(invoke); - vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); - vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); - vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); + const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); vixl32::Register temp3; if (mirror::kUseStringCompression) { temp3 = RegisterFrom(locations->GetTemp(3)); } - vixl32::Label loop; - vixl32::Label find_char_diff; vixl32::Label end; vixl32::Label different_compression; // Get offsets of count and value fields within a string object. const int32_t count_offset = mirror::String::CountOffset().Int32Value(); - const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Note that the null check must have been done earlier. DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); @@ -1497,7 +1498,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { SlowPathCodeARMVIXL* slow_path = nullptr; const bool can_slow_path = invoke->InputAt(1)->CanBeNull(); if (can_slow_path) { - slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel()); } @@ -1553,6 +1554,38 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ add(ne, temp0, temp0, temp0); } + + GenerateStringCompareToLoop(assembler, invoke, &end, &different_compression); + + __ Bind(&end); + + if (can_slow_path) { + __ Bind(slow_path->GetExitLabel()); + } +} + +static void GenerateStringCompareToLoop(ArmVIXLAssembler* assembler, + HInvoke* invoke, + vixl32::Label* end, + vixl32::Label* different_compression) { + LocationSummary* locations = invoke->GetLocations(); + + const vixl32::Register str = InputRegisterAt(invoke, 0); + const vixl32::Register arg = InputRegisterAt(invoke, 1); + const vixl32::Register out = OutputRegister(invoke); + + const vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0)); + const vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + const vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); + vixl32::Register temp3; + if (mirror::kUseStringCompression) { + temp3 = RegisterFrom(locations->GetTemp(3)); + } + + vixl32::Label loop; + vixl32::Label find_char_diff; + + const int32_t value_offset = mirror::String::ValueOffset().Int32Value(); // Store offset of string value in preparation for comparison loop. __ Mov(temp1, value_offset); @@ -1561,7 +1594,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { static_assert(IsAligned<8>(kObjectAlignment), "String data must be 8-byte aligned for unrolled CompareTo loop."); - const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const unsigned char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); @@ -1584,12 +1617,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // With string compression, we have compared 8 bytes, otherwise 4 chars. __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4)); __ B(hi, &loop, /* far_target */ false); - __ B(&end); + __ B(end); __ Bind(&find_char_diff_2nd_cmp); if (mirror::kUseStringCompression) { __ Subs(temp0, temp0, 4); // 4 bytes previously compared. - __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end? + __ B(ls, end, /* far_target */ false); // Was the second comparison fully beyond the end? } else { // Without string compression, we can start treating temp0 as signed // and rely on the signed comparison below. @@ -1617,7 +1650,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { // the remaining string data, so just return length diff (out). // The comparison is unsigned for string compression, otherwise signed. __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4))); - __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false); + __ B((mirror::kUseStringCompression ? ls : le), end, /* far_target */ false); // Extract the characters and calculate the difference. if (mirror::kUseStringCompression) { @@ -1644,11 +1677,11 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { temps.Release(temp_reg); if (mirror::kUseStringCompression) { - __ B(&end); - __ Bind(&different_compression); + __ B(end); + __ Bind(different_compression); // Comparison for different compression style. - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); // We want to free up the temp3, currently holding `str.count`, for comparison. @@ -1687,7 +1720,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ B(ne, &different_compression_diff, /* far_target */ false); __ Subs(temp0, temp0, 2); __ B(hi, &different_compression_loop, /* far_target */ false); - __ B(&end); + __ B(end); // Calculate the difference. __ Bind(&different_compression_diff); @@ -1705,27 +1738,60 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) { __ it(cc); __ rsb(cc, out, out, 0); } +} - __ Bind(&end); +// The cut off for unrolling the loop in String.equals() intrinsic for const strings. +// The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression +// and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4 +// instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE). +// Allow up to 12 instructions (32 bytes) for the unrolled loop. +constexpr size_t kShortConstStringEqualsCutoffInBytes = 16; - if (can_slow_path) { - __ Bind(slow_path->GetExitLabel()); +static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) { + if (candidate->IsLoadString()) { + HLoadString* load_string = candidate->AsLoadString(); + const DexFile& dex_file = load_string->GetDexFile(); + return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length); } + return nullptr; } void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); + // Temporary registers to store lengths of strings and for calculations. // Using instruction cbz requires a low register, so explicitly set a temp to be R0. locations->AddTemp(LocationFrom(r0)); - locations->AddTemp(Location::RequiresRegister()); - locations->AddTemp(Location::RequiresRegister()); + // For the generic implementation and for long const strings we need an extra temporary. + // We do not need it for short const strings, up to 4 bytes, see code generation below. + uint32_t const_string_length = 0u; + const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); + if (const_string == nullptr) { + const_string = GetConstString(invoke->InputAt(1), &const_string_length); + } + bool is_compressed = + mirror::kUseStringCompression && + const_string != nullptr && + mirror::String::DexFileStringAllASCII(const_string, const_string_length); + if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) { + locations->AddTemp(Location::RequiresRegister()); + } + + // TODO: If the String.equals() is used only for an immediately following HIf, we can + // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks. + // Then we shall need an extra temporary register instead of the output register. locations->SetOut(Location::RequiresRegister()); } @@ -1738,8 +1804,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { vixl32::Register out = OutputRegister(invoke); vixl32::Register temp = RegisterFrom(locations->GetTemp(0)); - vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); - vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2)); vixl32::Label loop; vixl32::Label end; @@ -1771,52 +1835,109 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { // Receiver must be a string object, so its class field is equal to all strings' class fields. // If the argument is a string object, its class field must be equal to receiver's class field. __ Ldr(temp, MemOperand(str, class_offset)); - __ Ldr(temp1, MemOperand(arg, class_offset)); - __ Cmp(temp, temp1); + __ Ldr(out, MemOperand(arg, class_offset)); + __ Cmp(temp, out); __ B(ne, &return_false, /* far_target */ false); } - // Load `count` fields of this and argument strings. - __ Ldr(temp, MemOperand(str, count_offset)); - __ Ldr(temp1, MemOperand(arg, count_offset)); - // Check if `count` fields are equal, return false if they're not. - // Also compares the compression style, if differs return false. - __ Cmp(temp, temp1); - __ B(ne, &return_false, /* far_target */ false); - // Return true if both strings are empty. Even with string compression `count == 0` means empty. - static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, - "Expecting 0=compressed, 1=uncompressed"); - __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); + // Check if one of the inputs is a const string. Do not special-case both strings + // being const, such cases should be handled by constant folding if needed. + uint32_t const_string_length = 0u; + const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length); + if (const_string == nullptr) { + const_string = GetConstString(invoke->InputAt(1), &const_string_length); + if (const_string != nullptr) { + std::swap(str, arg); // Make sure the const string is in `str`. + } + } + bool is_compressed = + mirror::kUseStringCompression && + const_string != nullptr && + mirror::String::DexFileStringAllASCII(const_string, const_string_length); + + if (const_string != nullptr) { + // Load `count` field of the argument string and check if it matches the const string. + // Also compares the compression style, if differs return false. + __ Ldr(temp, MemOperand(arg, count_offset)); + __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed))); + __ B(ne, &return_false, /* far_target */ false); + } else { + // Load `count` fields of this and argument strings. + __ Ldr(temp, MemOperand(str, count_offset)); + __ Ldr(out, MemOperand(arg, count_offset)); + // Check if `count` fields are equal, return false if they're not. + // Also compares the compression style, if differs return false. + __ Cmp(temp, out); + __ B(ne, &return_false, /* far_target */ false); + } // Assertions that must hold in order to compare strings 4 bytes at a time. + // Ok to do this because strings are zero-padded to kObjectAlignment. DCHECK_ALIGNED(value_offset, 4); static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare."); - if (mirror::kUseStringCompression) { - // For string compression, calculate the number of bytes to compare (not chars). - // This could in theory exceed INT32_MAX, so treat temp as unsigned. - __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. - ExactAssemblyScope aas(assembler->GetVIXLAssembler(), - 2 * kMaxInstructionSizeInBytes, - CodeBufferCheckScope::kMaximumSize); - __ it(cs); // If uncompressed, - __ add(cs, temp, temp, temp); // double the byte count. - } + if (const_string != nullptr && + const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes + : kShortConstStringEqualsCutoffInBytes / 2u)) { + // Load and compare the contents. Though we know the contents of the short const string + // at compile time, materializing constants may be more code than loading from memory. + int32_t offset = value_offset; + size_t remaining_bytes = + RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u); + while (remaining_bytes > sizeof(uint32_t)) { + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); + vixl32::Register temp2 = scratch_scope.Acquire(); + __ Ldrd(temp, temp1, MemOperand(str, offset)); + __ Ldrd(temp2, out, MemOperand(arg, offset)); + __ Cmp(temp, temp2); + __ B(ne, &return_false, /* far_label */ false); + __ Cmp(temp1, out); + __ B(ne, &return_false, /* far_label */ false); + offset += 2u * sizeof(uint32_t); + remaining_bytes -= 2u * sizeof(uint32_t); + } + if (remaining_bytes != 0u) { + __ Ldr(temp, MemOperand(str, offset)); + __ Ldr(out, MemOperand(arg, offset)); + __ Cmp(temp, out); + __ B(ne, &return_false, /* far_label */ false); + } + } else { + // Return true if both strings are empty. Even with string compression `count == 0` means empty. + static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u, + "Expecting 0=compressed, 1=uncompressed"); + __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false); - // Store offset of string value in preparation for comparison loop. - __ Mov(temp1, value_offset); + if (mirror::kUseStringCompression) { + // For string compression, calculate the number of bytes to compare (not chars). + // This could in theory exceed INT32_MAX, so treat temp as unsigned. + __ Lsrs(temp, temp, 1u); // Extract length and check compression flag. + ExactAssemblyScope aas(assembler->GetVIXLAssembler(), + 2 * kMaxInstructionSizeInBytes, + CodeBufferCheckScope::kMaximumSize); + __ it(cs); // If uncompressed, + __ add(cs, temp, temp, temp); // double the byte count. + } - // Loop to compare strings 4 bytes at a time starting at the front of the string. - // Ok to do this because strings are zero-padded to kObjectAlignment. - __ Bind(&loop); - __ Ldr(out, MemOperand(str, temp1)); - __ Ldr(temp2, MemOperand(arg, temp1)); - __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); - __ Cmp(out, temp2); - __ B(ne, &return_false, /* far_target */ false); - // With string compression, we have compared 4 bytes, otherwise 2 chars. - __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); - __ B(hi, &loop, /* far_target */ false); + vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1)); + UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler()); + vixl32::Register temp2 = scratch_scope.Acquire(); + + // Store offset of string value in preparation for comparison loop. + __ Mov(temp1, value_offset); + + // Loop to compare strings 4 bytes at a time starting at the front of the string. + __ Bind(&loop); + __ Ldr(out, MemOperand(str, temp1)); + __ Ldr(temp2, MemOperand(arg, temp1)); + __ Add(temp1, temp1, Operand::From(sizeof(uint32_t))); + __ Cmp(out, temp2); + __ B(ne, &return_false, /* far_target */ false); + // With string compression, we have compared 4 bytes, otherwise 2 chars. + __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2); + __ B(hi, &loop, /* far_target */ false); + } // Return true and exit the function. // If loop does not result in returning false, we return true. @@ -1836,7 +1957,6 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) { static void GenerateVisitStringIndexOf(HInvoke* invoke, ArmVIXLAssembler* assembler, CodeGeneratorARMVIXL* codegen, - ArenaAllocator* allocator, bool start_at_zero) { LocationSummary* locations = invoke->GetLocations(); @@ -1852,17 +1972,17 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { vixl32::Register char_reg = InputRegisterAt(invoke, 1); // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`. __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1); - slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen->AddSlowPath(slow_path); __ B(hs, slow_path->GetEntryLabel()); } @@ -1883,9 +2003,8 @@ static void GenerateVisitStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -1898,14 +2017,12 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) { - GenerateVisitStringIndexOf( - invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); } void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's // best to align the inputs accordingly. InvokeRuntimeCallingConventionARMVIXL calling_convention; @@ -1916,14 +2033,12 @@ void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) } void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateVisitStringIndexOf( - invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); + GenerateVisitStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); } void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); @@ -1936,7 +2051,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invok ArmVIXLAssembler* assembler = GetAssembler(); vixl32::Register byte_array = InputRegisterAt(invoke, 0); __ Cmp(byte_array, 0); - SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + SlowPathCodeARMVIXL* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); @@ -1946,9 +2062,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invok } void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1))); @@ -1968,9 +2083,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invok } void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0))); locations->SetOut(LocationFrom(r0)); @@ -1980,7 +2094,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invo ArmVIXLAssembler* assembler = GetAssembler(); vixl32::Register string_to_copy = InputRegisterAt(invoke, 0); __ Cmp(string_to_copy, 0); - SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + SlowPathCodeARMVIXL* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(slow_path); __ B(eq, slow_path->GetEntryLabel()); @@ -2115,7 +2230,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { Location temp3_loc = locations->GetTemp(2); vixl32::Register temp3 = RegisterFrom(temp3_loc); - SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke); + SlowPathCodeARMVIXL* intrinsic_slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathARMVIXL(invoke); codegen_->AddSlowPath(intrinsic_slow_path); vixl32::Label conditions_on_positions_validated; @@ -2365,8 +2481,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Null constant length: not need to emit the loop code at all. } else { vixl32::Label done; - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); + const DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); if (length.IsRegister()) { // Don't enter the copy loop if the length is null. @@ -2421,7 +2537,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { // Note that the base destination address is computed in `temp2` // by the slow path code. SlowPathCodeARMVIXL* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); + new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke); codegen_->AddSlowPath(read_barrier_slow_path); // Given the numeric representation, it's enough to check the low bit of the @@ -2480,7 +2596,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) { __ Bind(intrinsic_slow_path->GetExitLabel()); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { // If the graph is debuggable, all callee-saved floating-point registers are blocked by // the code generator. Furthermore, the register allocator creates fixed live intervals // for all caller-saved registers because we are doing a function call. As a result, if @@ -2491,12 +2607,11 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { } DCHECK_EQ(invoke->GetNumberOfArguments(), 1U); - DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); - DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64); + DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); - LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); const InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -2506,7 +2621,7 @@ static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1))); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { // If the graph is debuggable, all callee-saved floating-point registers are blocked by // the code generator. Furthermore, the register allocator creates fixed live intervals // for all caller-saved registers because we are doing a function call. As a result, if @@ -2517,13 +2632,12 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) } DCHECK_EQ(invoke->GetNumberOfArguments(), 2U); - DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble); - DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble); - DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble); + DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64); + DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64); + DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64); - LocationSummary* const locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* const locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); const InvokeRuntimeCallingConventionARMVIXL calling_convention; locations->SetInAt(0, Location::RequiresFpuRegister()); @@ -2578,7 +2692,7 @@ static void GenFPFPToFPCall(HInvoke* invoke, } void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { @@ -2586,7 +2700,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { @@ -2594,7 +2708,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { @@ -2602,7 +2716,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { @@ -2610,7 +2724,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { @@ -2618,7 +2732,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { @@ -2626,7 +2740,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { @@ -2634,7 +2748,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { @@ -2642,7 +2756,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { @@ -2650,7 +2764,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { @@ -2658,7 +2772,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { @@ -2666,7 +2780,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { @@ -2674,7 +2788,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { @@ -2682,7 +2796,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { @@ -2690,15 +2804,23 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderARMVIXL::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { @@ -2706,7 +2828,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { @@ -2714,7 +2836,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { @@ -2723,11 +2845,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { @@ -2744,7 +2862,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { @@ -2753,11 +2871,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); - locations->SetInAt(0, Location::RequiresRegister()); - locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); + CreateLongToLongLocationsWithOverlap(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { @@ -2774,7 +2888,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { @@ -2782,12 +2896,12 @@ void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) { __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0)); } -static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) { - DCHECK(Primitive::IsIntOrLongType(type)) << type; - DCHECK_EQ(instr->GetType(), Primitive::kPrimInt); - DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type); +static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) { + DCHECK(DataType::IsIntOrLongType(type)) << type; + DCHECK_EQ(instr->GetType(), DataType::Type::kInt32); + DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type); - bool is_long = type == Primitive::kPrimLong; + bool is_long = type == DataType::Type::kInt64; LocationSummary* locations = instr->GetLocations(); Location in = locations->InAt(0); vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in); @@ -2811,12 +2925,12 @@ static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* } void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister()); } void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke, Primitive::kPrimInt, GetAssembler()); + GenBitCount(invoke, DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { @@ -2824,13 +2938,143 @@ void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke, Primitive::kPrimLong, GetAssembler()); + GenBitCount(invoke, DataType::Type::kInt64, GetAssembler()); +} + +static void GenHighestOneBit(HInvoke* invoke, + DataType::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(DataType::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == DataType::Type::kInt64) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out_reg_lo, in_reg_lo); + __ Clz(out_reg_hi, in_reg_hi); + __ Lsr(out_reg_lo, temp, out_reg_lo); + __ Lsrs(out_reg_hi, temp, out_reg_hi); + + // Discard result for lowest 32 bits if highest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_lo, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Mov(temp, 0x80000000); // Modified immediate. + __ Clz(out, in); + __ Lsr(out, temp, out); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + CreateLongToLongLocationsWithOverlap(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) { + GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_); +} + +static void GenLowestOneBit(HInvoke* invoke, + DataType::Type type, + CodeGeneratorARMVIXL* codegen) { + DCHECK(DataType::IsIntOrLongType(type)); + + ArmVIXLAssembler* assembler = codegen->GetAssembler(); + UseScratchRegisterScope temps(assembler->GetVIXLAssembler()); + const vixl32::Register temp = temps.Acquire(); + + if (type == DataType::Type::kInt64) { + LocationSummary* locations = invoke->GetLocations(); + Location in = locations->InAt(0); + Location out = locations->Out(); + + vixl32::Register in_reg_lo = LowRegisterFrom(in); + vixl32::Register in_reg_hi = HighRegisterFrom(in); + vixl32::Register out_reg_lo = LowRegisterFrom(out); + vixl32::Register out_reg_hi = HighRegisterFrom(out); + + __ Rsb(out_reg_hi, in_reg_hi, 0); + __ Rsb(out_reg_lo, in_reg_lo, 0); + __ And(out_reg_hi, out_reg_hi, in_reg_hi); + // The result of this operation is 0 iff in_reg_lo is 0 + __ Ands(out_reg_lo, out_reg_lo, in_reg_lo); + + // Discard result for highest 32 bits if lowest 32 bits are not zero. + // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8, + // we check that the output is in a low register, so that a 16-bit MOV + // encoding can be used. If output is in a high register, then we generate + // 4 more bytes of code to avoid a branch. + Operand mov_src(0); + if (!out_reg_lo.IsLow()) { + __ Mov(LeaveFlags, temp, 0); + mov_src = Operand(temp); + } + ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(), + 2 * vixl32::k16BitT32InstructionSizeInBytes, + CodeBufferCheckScope::kExactSize); + __ it(ne); + __ mov(ne, out_reg_hi, mov_src); + } else { + vixl32::Register out = OutputRegister(invoke); + vixl32::Register in = InputRegisterAt(invoke, 0); + + __ Rsb(temp, in, 0); + __ And(out, temp, in); + } +} + +void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + CreateIntToIntLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_); +} + +void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + CreateLongToLongLocationsWithOverlap(allocator_, invoke); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) { + GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_); } void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2848,7 +3092,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) LocationSummary* locations = invoke->GetLocations(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); // Location of data in char array buffer. @@ -2936,7 +3180,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) if (mirror::kUseStringCompression) { __ B(final_label); - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time. __ Bind(&compressed_string_preloop); @@ -2956,7 +3200,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) } void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { @@ -2974,7 +3218,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -3001,7 +3245,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) { void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } } @@ -3013,7 +3257,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) { void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) { if (features_.HasARMv8AInstructions()) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } } @@ -3077,7 +3321,7 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value(); uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache)); __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address)); - codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out); + codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out); assembler->MaybeUnpoisonHeapReference(out); __ B(&done); __ Bind(&allocate); @@ -3095,9 +3339,8 @@ void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -3120,14 +3363,18 @@ void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) { } } +void IntrinsicLocationsBuilderARMVIXL::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorARMVIXL::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe? UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure. UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar) UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit) -UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit) UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf); UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter); diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h index 023cba1349..9c02d0a4ad 100644 --- a/compiler/optimizing/intrinsics_arm_vixl.h +++ b/compiler/optimizing/intrinsics_arm_vixl.h @@ -36,7 +36,7 @@ class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -46,9 +46,9 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - ArenaAllocator* arena_; - CodeGenerator* codegen_; - ArmVIXLAssembler* assembler_; + ArenaAllocator* const allocator_; + CodeGenerator* const codegen_; + ArmVIXLAssembler* const assembler_; const ArmInstructionSetFeatures& features_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL); @@ -63,7 +63,7 @@ class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -71,7 +71,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); ArmVIXLAssembler* GetAssembler(); - CodeGeneratorARMVIXL* codegen_; + CodeGeneratorARMVIXL* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL); }; diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc index 4cea6dfdfb..b7936b9c8e 100644 --- a/compiler/optimizing/intrinsics_mips.cc +++ b/compiler/optimizing/intrinsics_mips.cc @@ -20,8 +20,10 @@ #include "art_method.h" #include "code_generator_mips.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "mirror/array-inl.h" +#include "mirror/object_array-inl.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" @@ -33,7 +35,7 @@ namespace art { namespace mips { IntrinsicLocationsBuilderMIPS::IntrinsicLocationsBuilderMIPS(CodeGeneratorMIPS* codegen) - : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), allocator_(codegen->GetGraph()->GetAllocator()) { } MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() { @@ -41,7 +43,7 @@ MipsAssembler* IntrinsicCodeGeneratorMIPS::GetAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorMIPS::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } inline bool IntrinsicCodeGeneratorMIPS::IsR2OrNewer() const { @@ -59,16 +61,16 @@ inline bool IntrinsicCodeGeneratorMIPS::Is32BitFPU() const { #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, - Primitive::Type type, + DataType::Type type, CodeGeneratorMIPS* codegen) { if (!trg.IsValid()) { - DCHECK_EQ(type, Primitive::kPrimVoid); + DCHECK_EQ(type, DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); - if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { + if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { Register trg_reg = trg.AsRegister<Register>(); if (trg_reg != V0) { __ Move(V0, trg_reg); @@ -76,7 +78,7 @@ static void MoveFromReturnRegister(Location trg, } else { FRegister trg_reg = trg.AsFpuRegister<FRegister>(); if (trg_reg != F0) { - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ MovS(F0, trg_reg); } else { __ MovD(F0, trg_reg); @@ -150,10 +152,9 @@ bool IntrinsicLocationsBuilderMIPS::TryDispatch(HInvoke* invoke) { #define __ assembler-> -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -176,7 +177,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, MipsAssembler* // long java.lang.Double.doubleToRawLongBits(double) void IntrinsicLocationsBuilderMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -185,17 +186,16 @@ void IntrinsicCodeGeneratorMIPS::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) // int java.lang.Float.floatToRawIntBits(float) void IntrinsicLocationsBuilderMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitFloatFloatToRawIntBits(HInvoke* invoke) { MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); } @@ -218,7 +218,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, MipsAssembler* // double java.lang.Double.longBitsToDouble(long) void IntrinsicLocationsBuilderMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { @@ -227,35 +227,34 @@ void IntrinsicCodeGeneratorMIPS::VisitDoubleLongBitsToDouble(HInvoke* invoke) { // float java.lang.Float.intBitsToFloat(int) void IntrinsicLocationsBuilderMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, Location::OutputOverlap overlaps = Location::kNoOutputOverlap) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), overlaps); } static void GenReverse(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool isR2OrNewer, bool isR6, bool reverseBits, MipsAssembler* assembler) { - DCHECK(type == Primitive::kPrimShort || - type == Primitive::kPrimInt || - type == Primitive::kPrimLong); - DCHECK(type != Primitive::kPrimShort || !reverseBits); + DCHECK(type == DataType::Type::kInt16 || + type == DataType::Type::kInt32 || + type == DataType::Type::kInt64); + DCHECK(type != DataType::Type::kInt16 || !reverseBits); - if (type == Primitive::kPrimShort) { + if (type == DataType::Type::kInt16) { Register in = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); @@ -269,7 +268,7 @@ static void GenReverse(LocationSummary* locations, __ Srl(out, out, 24); __ Or(out, out, TMP); } - } else if (type == Primitive::kPrimInt) { + } else if (type == DataType::Type::kInt32) { Register in = locations->InAt(0).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); @@ -314,7 +313,7 @@ static void GenReverse(LocationSummary* locations, __ Or(out, TMP, out); } } - } else if (type == Primitive::kPrimLong) { + } else if (type == DataType::Type::kInt64) { Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register out_lo = locations->Out().AsRegisterPairLow<Register>(); @@ -400,12 +399,12 @@ static void GenReverse(LocationSummary* locations, // int java.lang.Integer.reverseBytes(int) void IntrinsicLocationsBuilderMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { GenReverse(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, IsR2OrNewer(), IsR6(), /* reverseBits */ false, @@ -414,12 +413,12 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverseBytes(HInvoke* invoke) { // long java.lang.Long.reverseBytes(long) void IntrinsicLocationsBuilderMIPS::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { GenReverse(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, IsR2OrNewer(), IsR6(), /* reverseBits */ false, @@ -428,12 +427,12 @@ void IntrinsicCodeGeneratorMIPS::VisitLongReverseBytes(HInvoke* invoke) { // short java.lang.Short.reverseBytes(short) void IntrinsicLocationsBuilderMIPS::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitShortReverseBytes(HInvoke* invoke) { GenReverse(invoke->GetLocations(), - Primitive::kPrimShort, + DataType::Type::kInt16, IsR2OrNewer(), IsR6(), /* reverseBits */ false, @@ -472,7 +471,7 @@ static void GenNumberOfLeadingZeroes(LocationSummary* locations, // int java.lang.Integer.numberOfLeadingZeros(int i) void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { @@ -481,7 +480,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfLeadingZeros(HInvoke* invok // int java.lang.Long.numberOfLeadingZeros(long i) void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -559,7 +558,7 @@ static void GenNumberOfTrailingZeroes(LocationSummary* locations, // int java.lang.Integer.numberOfTrailingZeros(int i) void IntrinsicLocationsBuilderMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); + CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); } void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { @@ -568,7 +567,7 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerNumberOfTrailingZeros(HInvoke* invo // int java.lang.Long.numberOfTrailingZeros(long i) void IntrinsicLocationsBuilderMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); + CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); } void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -577,12 +576,12 @@ void IntrinsicCodeGeneratorMIPS::VisitLongNumberOfTrailingZeros(HInvoke* invoke) // int java.lang.Integer.reverse(int) void IntrinsicLocationsBuilderMIPS::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { GenReverse(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, IsR2OrNewer(), IsR6(), /* reverseBits */ true, @@ -591,28 +590,27 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerReverse(HInvoke* invoke) { // long java.lang.Long.reverse(long) void IntrinsicLocationsBuilderMIPS::VisitLongReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitLongReverse(HInvoke* invoke) { GenReverse(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, IsR2OrNewer(), IsR6(), /* reverseBits */ true, GetAssembler()); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } static void GenBitCount(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool isR6, MipsAssembler* assembler) { Register out = locations->Out().AsRegister<Register>(); @@ -639,7 +637,7 @@ static void GenBitCount(LocationSummary* locations, // instructions compared to a loop-based algorithm which required 47 // instructions. - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { Register in = locations->InAt(0).AsRegister<Register>(); __ Srl(TMP, in, 1); @@ -663,7 +661,7 @@ static void GenBitCount(LocationSummary* locations, } __ Srl(out, out, 24); } else { - DCHECK_EQ(type, Primitive::kPrimLong); + DCHECK_EQ(type, DataType::Type::kInt64); Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register tmp_hi = locations->GetTemp(0).AsRegister<Register>(); @@ -723,18 +721,17 @@ static void GenBitCount(LocationSummary* locations, // int java.lang.Integer.bitCount(int) void IntrinsicLocationsBuilderMIPS::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); } // int java.lang.Long.bitCount(int) void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresRegister()); @@ -742,7 +739,7 @@ void IntrinsicLocationsBuilderMIPS::VisitLongBitCount(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); } static void MathAbsFP(LocationSummary* locations, @@ -799,7 +796,7 @@ static void MathAbsFP(LocationSummary* locations, // double java.lang.Math.abs(double) void IntrinsicLocationsBuilderMIPS::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { @@ -808,7 +805,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAbsDouble(HInvoke* invoke) { // float java.lang.Math.abs(float) void IntrinsicLocationsBuilderMIPS::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAbsFloat(HInvoke* invoke) { @@ -845,7 +842,7 @@ static void GenAbsInteger(LocationSummary* locations, bool is64bit, MipsAssemble // int java.lang.Math.abs(int) void IntrinsicLocationsBuilderMIPS::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { @@ -854,7 +851,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAbsInt(HInvoke* invoke) { // long java.lang.Math.abs(long) void IntrinsicLocationsBuilderMIPS::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { @@ -863,7 +860,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAbsLong(HInvoke* invoke) { static void GenMinMaxFP(LocationSummary* locations, bool is_min, - Primitive::Type type, + DataType::Type type, bool is_R6, MipsAssembler* assembler) { FRegister out = locations->Out().AsFpuRegister<FRegister>(); @@ -882,7 +879,7 @@ static void GenMinMaxFP(LocationSummary* locations, // returned. This is why there is extra logic preceding the use of // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a // NaN, return the NaN, otherwise return the min/max. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ CmpUnD(FTMP, a, b); __ Bc1eqz(FTMP, &noNaNs); @@ -905,7 +902,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ MaxD(out, a, b); } } else { - DCHECK_EQ(type, Primitive::kPrimFloat); + DCHECK_EQ(type, DataType::Type::kFloat32); __ CmpUnS(FTMP, a, b); __ Bc1eqz(FTMP, &noNaNs); @@ -936,16 +933,16 @@ static void GenMinMaxFP(LocationSummary* locations, MipsLabel select; MipsLabel done; - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ CunD(a, b); } else { - DCHECK_EQ(type, Primitive::kPrimFloat); + DCHECK_EQ(type, DataType::Type::kFloat32); __ CunS(a, b); } __ Bc1f(&ordered); // a or b (or both) is a NaN. Return one, which is a NaN. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ CeqD(b, b); } else { __ CeqS(b, b); @@ -957,7 +954,7 @@ static void GenMinMaxFP(LocationSummary* locations, // Neither is a NaN. // a == b? (-0.0 compares equal with +0.0) // If equal, handle zeroes, else compare further. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ CeqD(a, b); } else { __ CeqS(a, b); @@ -965,7 +962,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bc1f(&compare); // a == b either bit for bit or one is -0.0 and the other is +0.0. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ MoveFromFpuHigh(TMP, a); __ MoveFromFpuHigh(AT, b); } else { @@ -981,7 +978,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ And(TMP, TMP, AT); } - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ Mfc1(AT, a); __ Mtc1(AT, out); __ MoveToFpuHigh(TMP, out); @@ -992,7 +989,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bind(&compare); - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { if (is_min) { // return (a <= b) ? a : b; __ ColeD(a, b); @@ -1012,7 +1009,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bind(&select); - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ MovtD(out, a); __ MovfD(out, b); } else { @@ -1024,10 +1021,9 @@ static void GenMinMaxFP(LocationSummary* locations, } } -static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kOutputOverlap); @@ -1035,60 +1031,59 @@ static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { // double java.lang.Math.min(double, double) void IntrinsicLocationsBuilderMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMinDoubleDouble(HInvoke* invoke) { GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, - Primitive::kPrimDouble, + DataType::Type::kFloat64, IsR6(), GetAssembler()); } // float java.lang.Math.min(float, float) void IntrinsicLocationsBuilderMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMinFloatFloat(HInvoke* invoke) { GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, - Primitive::kPrimFloat, + DataType::Type::kFloat32, IsR6(), GetAssembler()); } // double java.lang.Math.max(double, double) void IntrinsicLocationsBuilderMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMaxDoubleDouble(HInvoke* invoke) { GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, - Primitive::kPrimDouble, + DataType::Type::kFloat64, IsR6(), GetAssembler()); } // float java.lang.Math.max(float, float) void IntrinsicLocationsBuilderMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMaxFloatFloat(HInvoke* invoke) { GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, - Primitive::kPrimFloat, + DataType::Type::kFloat32, IsR6(), GetAssembler()); } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -1096,7 +1091,7 @@ static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { static void GenMinMax(LocationSummary* locations, bool is_min, - Primitive::Type type, + DataType::Type type, bool is_R6, MipsAssembler* assembler) { if (is_R6) { @@ -1123,7 +1118,7 @@ static void GenMinMax(LocationSummary* locations, // as the output register; the else clause also handles the case // where the output register is distinct from both the first, and the // second input registers. - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); @@ -1166,7 +1161,7 @@ static void GenMinMax(LocationSummary* locations, __ Or(out_hi, out_hi, AT); } } else { - DCHECK_EQ(type, Primitive::kPrimInt); + DCHECK_EQ(type, DataType::Type::kInt32); Register a = locations->InAt(0).AsRegister<Register>(); Register b = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); @@ -1188,7 +1183,7 @@ static void GenMinMax(LocationSummary* locations, } } } else { - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { Register a_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register a_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register b_lo = locations->InAt(1).AsRegisterPairLow<Register>(); @@ -1232,7 +1227,7 @@ static void GenMinMax(LocationSummary* locations, } } } else { - DCHECK_EQ(type, Primitive::kPrimInt); + DCHECK_EQ(type, DataType::Type::kInt32); Register a = locations->InAt(0).AsRegister<Register>(); Register b = locations->InAt(1).AsRegister<Register>(); Register out = locations->Out().AsRegister<Register>(); @@ -1265,59 +1260,59 @@ static void GenMinMax(LocationSummary* locations, // int java.lang.Math.min(int, int) void IntrinsicLocationsBuilderMIPS::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMinIntInt(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ true, - Primitive::kPrimInt, + DataType::Type::kInt32, IsR6(), GetAssembler()); } // long java.lang.Math.min(long, long) void IntrinsicLocationsBuilderMIPS::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMinLongLong(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ true, - Primitive::kPrimLong, + DataType::Type::kInt64, IsR6(), GetAssembler()); } // int java.lang.Math.max(int, int) void IntrinsicLocationsBuilderMIPS::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMaxIntInt(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ false, - Primitive::kPrimInt, + DataType::Type::kInt32, IsR6(), GetAssembler()); } // long java.lang.Math.max(long, long) void IntrinsicLocationsBuilderMIPS::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathMaxLongLong(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ false, - Primitive::kPrimLong, + DataType::Type::kInt64, IsR6(), GetAssembler()); } // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) { @@ -1331,7 +1326,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathSqrt(HInvoke* invoke) { // byte libcore.io.Memory.peekByte(long address) void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) { @@ -1344,7 +1339,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekByte(HInvoke* invoke) { // short libcore.io.Memory.peekShort(long address) void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) { @@ -1376,7 +1371,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekShortNative(HInvoke* invoke) { // int libcore.io.Memory.peekInt(long address) void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); + CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) { @@ -1394,7 +1389,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekIntNative(HInvoke* invoke) { // long libcore.io.Memory.peekLong(long address) void IntrinsicLocationsBuilderMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); + CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) { @@ -1414,17 +1409,16 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPeekLongNative(HInvoke* invoke) { } } -static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } // void libcore.io.Memory.pokeByte(long address, byte value) void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) { @@ -1437,7 +1431,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeByte(HInvoke* invoke) { // void libcore.io.Memory.pokeShort(long address, short value) void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) { @@ -1459,7 +1453,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeShortNative(HInvoke* invoke) { // void libcore.io.Memory.pokeInt(long address, int value) void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) { @@ -1477,7 +1471,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeIntNative(HInvoke* invoke) { // void libcore.io.Memory.pokeLong(long address, long value) void IntrinsicLocationsBuilderMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) { @@ -1499,9 +1493,8 @@ void IntrinsicCodeGeneratorMIPS::VisitMemoryPokeLongNative(HInvoke* invoke) { // Thread java.lang.Thread.currentThread() void IntrinsicLocationsBuilderMIPS::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -1515,17 +1508,18 @@ void IntrinsicCodeGeneratorMIPS::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMipsPointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type) { + DataType::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -1534,7 +1528,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); @@ -1544,14 +1538,14 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, bool is_R6, CodeGeneratorMIPS* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)) << type; + DCHECK((type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)) << type; MipsAssembler* assembler = codegen->GetAssembler(); // Target register. Location trg_loc = locations->Out(); @@ -1564,12 +1558,12 @@ static void GenUnsafeGet(HInvoke* invoke, Location offset_loc = locations->InAt(2); Register offset_lo = offset_loc.AsRegisterPairLow<Register>(); - if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == DataType::Type::kReference))) { __ Addu(TMP, base, offset_lo); } switch (type) { - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register trg_lo = trg_loc.AsRegisterPairLow<Register>(); Register trg_hi = trg_loc.AsRegisterPairHigh<Register>(); CHECK(!is_volatile); // TODO: support atomic 8-byte volatile loads. @@ -1585,7 +1579,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register trg = trg_loc.AsRegister<Register>(); if (is_R6) { __ Lw(trg, TMP, 0); @@ -1599,7 +1593,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { Register trg = trg_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { @@ -1655,53 +1649,52 @@ static void GenUnsafeGet(HInvoke* invoke, // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, IsR6(), codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, IsR6(), codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, IsR6(), codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, IsR6(), codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, IsR6(), codegen_); } -static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1711,14 +1704,14 @@ static void CreateIntIntIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* in // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool is_volatile, bool is_ordered, bool is_R6, CodeGeneratorMIPS* codegen) { - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)) << type; + DCHECK((type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)) << type; MipsAssembler* assembler = codegen->GetAssembler(); // Object pointer. Register base = locations->InAt(1).AsRegister<Register>(); @@ -1731,10 +1724,10 @@ static void GenUnsafePut(LocationSummary* locations, if (is_volatile || is_ordered) { __ Sync(0); } - if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) { + if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { Register value = locations->InAt(3).AsRegister<Register>(); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ PoisonHeapReference(AT, value); value = AT; } @@ -1764,7 +1757,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Sync(0); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, locations->InAt(3).AsRegister<Register>(), value_can_be_null); } @@ -1772,12 +1765,12 @@ static void GenUnsafePut(LocationSummary* locations, // void sun.misc.Unsafe.putInt(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ false, IsR6(), @@ -1786,12 +1779,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePut(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ true, IsR6(), @@ -1800,12 +1793,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutOrdered(HInvoke* invoke) { // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ true, /* is_ordered */ false, IsR6(), @@ -1814,12 +1807,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutVolatile(HInvoke* invoke) { // void sun.misc.Unsafe.putObject(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ false, IsR6(), @@ -1828,12 +1821,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObject(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ true, IsR6(), @@ -1842,12 +1835,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectOrdered(HInvoke* invoke) { // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ true, /* is_ordered */ false, IsR6(), @@ -1856,12 +1849,12 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutObjectVolatile(HInvoke* invoke) { // void sun.misc.Unsafe.putLong(Object o, long offset, long x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ false, IsR6(), @@ -1870,27 +1863,28 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLong(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidLocations(arena_, invoke); + CreateIntIntIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ true, IsR6(), codegen_); } -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1906,7 +1900,7 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) { +static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS* codegen) { MipsAssembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); bool isR6 = codegen->GetInstructionSetFeatures().IsR6(); @@ -1922,7 +1916,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod DCHECK_NE(offset_lo, out); DCHECK_NE(expected, out); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); @@ -1952,7 +1946,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod MipsLabel loop_head, exit_loop; __ Addu(TMP, base, offset_lo); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ PoisonHeapReference(expected); // Do not poison `value`, if it is the same register as // `expected`, which has just been poisoned. @@ -1968,7 +1962,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod __ Sync(0); __ Bind(&loop_head); - if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) { + if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { if (isR6) { __ LlR6(out, TMP); } else { @@ -1986,11 +1980,11 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod // in the case that the store fails. Whether the // store succeeds, or fails, it will load the // correct Boolean value into the 'out' register. - // This test isn't really necessary. We only support Primitive::kPrimInt, - // Primitive::kPrimNot, and we already verified that we're working on one + // This test isn't really necessary. We only support DataType::Type::kInt, + // DataType::Type::kReference, and we already verified that we're working on one // of those two types. It's left here in case the code needs to support // other types in the future. - if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) { + if ((type == DataType::Type::kInt32) || (type == DataType::Type::kReference)) { if (isR6) { __ ScR6(out, TMP); } else { @@ -2002,7 +1996,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod __ Bind(&exit_loop); __ Sync(0); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ UnpoisonHeapReference(expected); // Do not unpoison `value`, if it is the same register as // `expected`, which has just been unpoisoned. @@ -2014,11 +2008,11 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* cod // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimInt, codegen_); + GenCas(invoke, DataType::Type::kInt32, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) @@ -2029,7 +2023,7 @@ void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { @@ -2037,18 +2031,17 @@ void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) { // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke, Primitive::kPrimNot, codegen_); + GenCas(invoke, DataType::Type::kReference, codegen_); } // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); } @@ -2060,7 +2053,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); Register argument = locations->InAt(1).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(argument, slow_path->GetEntryLabel()); codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path); @@ -2069,9 +2062,15 @@ void IntrinsicCodeGeneratorMIPS::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); @@ -2193,8 +2192,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringEquals(HInvoke* invoke) { static void GenerateStringIndexOf(HInvoke* invoke, bool start_at_zero, MipsAssembler* assembler, - CodeGeneratorMIPS* codegen, - ArenaAllocator* allocator) { + CodeGeneratorMIPS* codegen) { LocationSummary* locations = invoke->GetLocations(); Register tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<Register>() : TMP; @@ -2210,13 +2208,13 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Always needs the slow-path. We could directly dispatch to it, // but this case should be rare, so for simplicity just put the // full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen->AddSlowPath(slow_path); __ B(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { Register char_reg = locations->InAt(1).AsRegister<Register>(); // The "bltu" conditional branch tests to see if the character value // fits in a valid 16-bit (MIPS halfword) value. If it doesn't then @@ -2227,7 +2225,7 @@ static void GenerateStringIndexOf(HInvoke* invoke, // two halfwords so we fallback to using the generic implementation // of indexOf(). __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max()); - slow_path = new (allocator) IntrinsicSlowPathMIPS(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen->AddSlowPath(slow_path); __ Bltu(tmp_reg, char_reg, slow_path->GetEntryLabel()); } @@ -2246,15 +2244,14 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); // Need a temp for slow-path codepoint compare, and need to send start-index=0. @@ -2262,25 +2259,20 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, - /* start_at_zero */ true, - GetAssembler(), - codegen_, - GetAllocator()); + GenerateStringIndexOf(invoke, /* start_at_zero */ true, GetAssembler(), codegen_); } // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); // Need a temp for slow-path codepoint compare. @@ -2288,24 +2280,19 @@ void IntrinsicLocationsBuilderMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf(invoke, - /* start_at_zero */ false, - GetAssembler(), - codegen_, - GetAllocator()); + GenerateStringIndexOf(invoke, /* start_at_zero */ false, GetAssembler(), codegen_); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); } @@ -2314,7 +2301,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) LocationSummary* locations = invoke->GetLocations(); Register byte_array = locations->InAt(0).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(byte_array, slow_path->GetEntryLabel()); codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path); @@ -2323,14 +2310,13 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromBytes(HInvoke* invoke) // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); } @@ -2346,12 +2332,11 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromChars(HInvoke* invoke) // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<Register>())); } @@ -2360,7 +2345,7 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) LocationSummary* locations = invoke->GetLocations(); Register string_to_copy = locations->InAt(0).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); __ Beqz(string_to_copy, slow_path->GetEntryLabel()); codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc()); @@ -2368,16 +2353,16 @@ void IntrinsicCodeGeneratorMIPS::VisitStringNewStringFromString(HInvoke* invoke) } static void GenIsInfinite(LocationSummary* locations, - const Primitive::Type type, + const DataType::Type type, const bool isR6, MipsAssembler* assembler) { FRegister in = locations->InAt(0).AsFpuRegister<FRegister>(); Register out = locations->Out().AsRegister<Register>(); - DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble); + DCHECK(type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64); if (isR6) { - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ ClassD(FTMP, in); } else { __ ClassS(FTMP, in); @@ -2387,7 +2372,7 @@ static void GenIsInfinite(LocationSummary* locations, __ Sltu(out, ZERO, out); } else { // If one, or more, of the exponent bits is zero, then the number can't be infinite. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ MoveFromFpuHigh(TMP, in); __ LoadConst32(AT, High32Bits(kPositiveInfinityDouble)); } else { @@ -2398,7 +2383,7 @@ static void GenIsInfinite(LocationSummary* locations, __ Sll(TMP, TMP, 1); - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ Mfc1(AT, in); __ Or(TMP, TMP, AT); } @@ -2409,29 +2394,29 @@ static void GenIsInfinite(LocationSummary* locations, // boolean java.lang.Float.isInfinite(float) void IntrinsicLocationsBuilderMIPS::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitFloatIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), Primitive::kPrimFloat, IsR6(), GetAssembler()); + GenIsInfinite(invoke->GetLocations(), DataType::Type::kFloat32, IsR6(), GetAssembler()); } // boolean java.lang.Double.isInfinite(double) void IntrinsicLocationsBuilderMIPS::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitDoubleIsInfinite(HInvoke* invoke) { - GenIsInfinite(invoke->GetLocations(), Primitive::kPrimDouble, IsR6(), GetAssembler()); + GenIsInfinite(invoke->GetLocations(), DataType::Type::kFloat64, IsR6(), GetAssembler()); } static void GenHighestOneBit(LocationSummary* locations, - const Primitive::Type type, + const DataType::Type type, bool isR6, MipsAssembler* assembler) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register out_lo = locations->Out().AsRegisterPairLow<Register>(); @@ -2474,29 +2459,29 @@ static void GenHighestOneBit(LocationSummary* locations, // int java.lang.Integer.highestOneBit(int) void IntrinsicLocationsBuilderMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler()); + GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); } // long java.lang.Long.highestOneBit(long) void IntrinsicLocationsBuilderMIPS::VisitLongHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke, Location::kOutputOverlap); + CreateIntToIntLocations(allocator_, invoke, Location::kOutputOverlap); } void IntrinsicCodeGeneratorMIPS::VisitLongHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler()); + GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); } static void GenLowestOneBit(LocationSummary* locations, - const Primitive::Type type, + const DataType::Type type, bool isR6, MipsAssembler* assembler) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { Register in_lo = locations->InAt(0).AsRegisterPairLow<Register>(); Register in_hi = locations->InAt(0).AsRegisterPairHigh<Register>(); Register out_lo = locations->Out().AsRegisterPairLow<Register>(); @@ -2522,27 +2507,26 @@ static void GenLowestOneBit(LocationSummary* locations, // int java.lang.Integer.lowestOneBit(int) void IntrinsicLocationsBuilderMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, IsR6(), GetAssembler()); + GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt32, IsR6(), GetAssembler()); } // long java.lang.Long.lowestOneBit(long) void IntrinsicLocationsBuilderMIPS::VisitLongLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitLongLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, IsR6(), GetAssembler()); + GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt64, IsR6(), GetAssembler()); } // int java.lang.Math.round(float) void IntrinsicLocationsBuilderMIPS::VisitMathRoundFloat(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); @@ -2665,9 +2649,8 @@ void IntrinsicCodeGeneratorMIPS::VisitMathRoundFloat(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2684,9 +2667,9 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); - const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); Register srcObj = locations->InAt(0).AsRegister<Register>(); Register srcBegin = locations->InAt(1).AsRegister<Register>(); @@ -2755,25 +2738,23 @@ void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); } static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorMIPS* codegen, QuickEntrypointEnum entry) { @@ -2802,7 +2783,7 @@ static void GenFPFPToFPCall(HInvoke* invoke, // static double java.lang.Math.cos(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathCos(HInvoke* invoke) { @@ -2811,7 +2792,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathCos(HInvoke* invoke) { // static double java.lang.Math.sin(double a) void IntrinsicLocationsBuilderMIPS::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathSin(HInvoke* invoke) { @@ -2820,7 +2801,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathSin(HInvoke* invoke) { // static double java.lang.Math.acos(double a) void IntrinsicLocationsBuilderMIPS::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAcos(HInvoke* invoke) { @@ -2829,7 +2810,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAcos(HInvoke* invoke) { // static double java.lang.Math.asin(double a) void IntrinsicLocationsBuilderMIPS::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAsin(HInvoke* invoke) { @@ -2838,7 +2819,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAsin(HInvoke* invoke) { // static double java.lang.Math.atan(double a) void IntrinsicLocationsBuilderMIPS::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAtan(HInvoke* invoke) { @@ -2847,16 +2828,25 @@ void IntrinsicCodeGeneratorMIPS::VisitMathAtan(HInvoke* invoke) { // static double java.lang.Math.atan2(double y, double x) void IntrinsicLocationsBuilderMIPS::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathCbrt(HInvoke* invoke) { @@ -2865,7 +2855,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathCbrt(HInvoke* invoke) { // static double java.lang.Math.cosh(double x) void IntrinsicLocationsBuilderMIPS::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathCosh(HInvoke* invoke) { @@ -2874,7 +2864,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathCosh(HInvoke* invoke) { // static double java.lang.Math.exp(double a) void IntrinsicLocationsBuilderMIPS::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathExp(HInvoke* invoke) { @@ -2883,7 +2873,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathExp(HInvoke* invoke) { // static double java.lang.Math.expm1(double x) void IntrinsicLocationsBuilderMIPS::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathExpm1(HInvoke* invoke) { @@ -2892,7 +2882,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathExpm1(HInvoke* invoke) { // static double java.lang.Math.hypot(double x, double y) void IntrinsicLocationsBuilderMIPS::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathHypot(HInvoke* invoke) { @@ -2901,7 +2891,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathHypot(HInvoke* invoke) { // static double java.lang.Math.log(double a) void IntrinsicLocationsBuilderMIPS::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathLog(HInvoke* invoke) { @@ -2910,7 +2900,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathLog(HInvoke* invoke) { // static double java.lang.Math.log10(double x) void IntrinsicLocationsBuilderMIPS::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathLog10(HInvoke* invoke) { @@ -2919,7 +2909,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathLog10(HInvoke* invoke) { // static double java.lang.Math.nextAfter(double start, double direction) void IntrinsicLocationsBuilderMIPS::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathNextAfter(HInvoke* invoke) { @@ -2928,7 +2918,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathNextAfter(HInvoke* invoke) { // static double java.lang.Math.sinh(double x) void IntrinsicLocationsBuilderMIPS::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathSinh(HInvoke* invoke) { @@ -2937,7 +2927,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathSinh(HInvoke* invoke) { // static double java.lang.Math.tan(double a) void IntrinsicLocationsBuilderMIPS::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathTan(HInvoke* invoke) { @@ -2946,7 +2936,7 @@ void IntrinsicCodeGeneratorMIPS::VisitMathTan(HInvoke* invoke) { // static double java.lang.Math.tanh(double x) void IntrinsicLocationsBuilderMIPS::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS::VisitMathTanh(HInvoke* invoke) { @@ -2980,7 +2970,7 @@ void IntrinsicLocationsBuilderMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, it is safe to generate inline code. LocationSummary* locations = - new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); @@ -3076,7 +3066,7 @@ void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { Register src_base = locations->GetTemp(1).AsRegister<Register>(); Register count = locations->GetTemp(2).AsRegister<Register>(); - SlowPathCodeMIPS* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS(invoke); + SlowPathCodeMIPS* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS(invoke); codegen_->AddSlowPath(slow_path); // Bail out if the source and destination are the same (to handle overlap). @@ -3110,10 +3100,10 @@ void IntrinsicCodeGeneratorMIPS::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); - const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -3152,7 +3142,7 @@ void IntrinsicLocationsBuilderMIPS::VisitIntegerValueOf(HInvoke* invoke) { IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, - calling_convention.GetReturnLocation(Primitive::kPrimNot), + calling_convention.GetReturnLocation(DataType::Type::kReference), Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -3229,6 +3219,34 @@ void IntrinsicCodeGeneratorMIPS::VisitIntegerValueOf(HInvoke* invoke) { } } +// static boolean java.lang.Thread.interrupted() +void IntrinsicLocationsBuilderMIPS::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS::VisitThreadInterrupted(HInvoke* invoke) { + MipsAssembler* assembler = GetAssembler(); + Register out = invoke->GetLocations()->Out().AsRegister<Register>(); + int32_t offset = Thread::InterruptedOffset<kMipsPointerSize>().Int32Value(); + __ LoadFromOffset(kLoadWord, out, TR, offset); + MipsLabel done; + __ Beqz(out, &done); + __ Sync(0); + __ StoreToOffset(kStoreWord, ZERO, TR, offset); + __ Sync(0); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderMIPS::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorMIPS::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + // Unimplemented intrinsics. UNIMPLEMENTED_INTRINSIC(MIPS, MathCeil) @@ -3258,8 +3276,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS, ThreadInterrupted) - UNREACHABLE_INTRINSICS(MIPS) #undef __ diff --git a/compiler/optimizing/intrinsics_mips.h b/compiler/optimizing/intrinsics_mips.h index eaadad2515..13397f11d4 100644 --- a/compiler/optimizing/intrinsics_mips.h +++ b/compiler/optimizing/intrinsics_mips.h @@ -39,7 +39,7 @@ class IntrinsicLocationsBuilderMIPS FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -49,8 +49,8 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - CodeGeneratorMIPS* codegen_; - ArenaAllocator* arena_; + CodeGeneratorMIPS* const codegen_; + ArenaAllocator* const allocator_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS); }; @@ -64,7 +64,7 @@ class IntrinsicCodeGeneratorMIPS FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -77,7 +77,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); - CodeGeneratorMIPS* codegen_; + CodeGeneratorMIPS* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS); }; diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc index d785567e0f..4668c561ed 100644 --- a/compiler/optimizing/intrinsics_mips64.cc +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -20,8 +20,10 @@ #include "art_method.h" #include "code_generator_mips64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "mirror/array-inl.h" +#include "mirror/object_array-inl.h" #include "mirror/string.h" #include "scoped_thread_state_change-inl.h" #include "thread.h" @@ -33,7 +35,7 @@ namespace art { namespace mips64 { IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) - : codegen_(codegen), arena_(codegen->GetGraph()->GetArena()) { + : codegen_(codegen), allocator_(codegen->GetGraph()->GetAllocator()) { } Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { @@ -41,22 +43,22 @@ Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } #define __ codegen->GetAssembler()-> static void MoveFromReturnRegister(Location trg, - Primitive::Type type, + DataType::Type type, CodeGeneratorMIPS64* codegen) { if (!trg.IsValid()) { - DCHECK_EQ(type, Primitive::kPrimVoid); + DCHECK_EQ(type, DataType::Type::kVoid); return; } - DCHECK_NE(type, Primitive::kPrimVoid); + DCHECK_NE(type, DataType::Type::kVoid); - if (Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) { + if (DataType::IsIntegralType(type) || type == DataType::Type::kReference) { GpuRegister trg_reg = trg.AsRegister<GpuRegister>(); if (trg_reg != V0) { __ Move(V0, trg_reg); @@ -64,7 +66,7 @@ static void MoveFromReturnRegister(Location trg, } else { FpuRegister trg_reg = trg.AsFpuRegister<FpuRegister>(); if (trg_reg != F0) { - if (type == Primitive::kPrimFloat) { + if (type == DataType::Type::kFloat32) { __ MovS(F0, trg_reg); } else { __ MovD(F0, trg_reg); @@ -139,10 +141,9 @@ bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { #define __ assembler-> -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } @@ -160,7 +161,7 @@ static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assemble // long java.lang.Double.doubleToRawLongBits(double) void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -169,17 +170,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invok // int java.lang.Float.floatToRawIntBits(float) void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); } @@ -197,7 +197,7 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assemble // double java.lang.Double.longBitsToDouble(long) void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { @@ -206,37 +206,36 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) // float java.lang.Float.intBitsToFloat(int) void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } static void GenReverseBytes(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, Mips64Assembler* assembler) { GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); switch (type) { - case Primitive::kPrimShort: + case DataType::Type::kInt16: __ Dsbh(out, in); __ Seh(out, out); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ Rotr(out, in, 16); __ Wsbh(out, out); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ Dsbh(out, in); __ Dshd(out, out); break; @@ -248,29 +247,29 @@ static void GenReverseBytes(LocationSummary* locations, // int java.lang.Integer.reverseBytes(int) void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } // long java.lang.Long.reverseBytes(long) void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } // short java.lang.Short.reverseBytes(short) void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } static void GenNumberOfLeadingZeroes(LocationSummary* locations, @@ -288,7 +287,7 @@ static void GenNumberOfLeadingZeroes(LocationSummary* locations, // int java.lang.Integer.numberOfLeadingZeros(int i) void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { @@ -297,7 +296,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* inv // int java.lang.Long.numberOfLeadingZeros(long i) void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { @@ -325,7 +324,7 @@ static void GenNumberOfTrailingZeroes(LocationSummary* locations, // int java.lang.Integer.numberOfTrailingZeros(int i) void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { @@ -334,7 +333,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfTrailingZeros(HInvoke* in // int java.lang.Long.numberOfTrailingZeros(long i) void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -342,14 +341,14 @@ void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfTrailingZeros(HInvoke* invok } static void GenReverse(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, Mips64Assembler* assembler) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ Rotr(out, in, 16); __ Wsbh(out, out); __ Bitswap(out, out); @@ -362,37 +361,36 @@ static void GenReverse(LocationSummary* locations, // int java.lang.Integer.reverse(int) void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenReverse(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } // long java.lang.Long.reverse(long) void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) { - GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenReverse(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); } static void GenBitCount(LocationSummary* locations, - const Primitive::Type type, + const DataType::Type type, Mips64Assembler* assembler) { GpuRegister out = locations->Out().AsRegister<GpuRegister>(); GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64); // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel // @@ -417,7 +415,7 @@ static void GenBitCount(LocationSummary* locations, // number of instructions executed even when a large number of bits // are set. - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ Srl(TMP, in, 1); __ LoadConst32(AT, 0x55555555); __ And(TMP, TMP, AT); @@ -434,7 +432,7 @@ static void GenBitCount(LocationSummary* locations, __ LoadConst32(TMP, 0x01010101); __ MulR6(out, out, TMP); __ Srl(out, out, 24); - } else if (type == Primitive::kPrimLong) { + } else if (type == DataType::Type::kInt64) { __ Dsrl(TMP, in, 1); __ LoadConst64(AT, 0x5555555555555555L); __ And(TMP, TMP, AT); @@ -456,20 +454,20 @@ static void GenBitCount(LocationSummary* locations, // int java.lang.Integer.bitCount(int) void IntrinsicLocationsBuilderMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } // int java.lang.Long.bitCount(long) void IntrinsicLocationsBuilderMIPS64::VisitLongBitCount(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongBitCount(HInvoke* invoke) { - GenBitCount(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenBitCount(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { @@ -485,7 +483,7 @@ static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* // double java.lang.Math.abs(double) void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { @@ -494,17 +492,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { // float java.lang.Math.abs(float) void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToInt(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); } @@ -526,7 +523,7 @@ static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assemb // int java.lang.Math.abs(int) void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToInt(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { @@ -535,7 +532,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { // long java.lang.Math.abs(long) void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToInt(arena_, invoke); + CreateIntToInt(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { @@ -544,7 +541,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { static void GenMinMaxFP(LocationSummary* locations, bool is_min, - Primitive::Type type, + DataType::Type type, Mips64Assembler* assembler) { FpuRegister a = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister b = locations->InAt(1).AsFpuRegister<FpuRegister>(); @@ -561,7 +558,7 @@ static void GenMinMaxFP(LocationSummary* locations, // returned. This is why there is extra logic preceding the use of // the MIPS min.fmt/max.fmt instructions. If either a, or b holds a // NaN, return the NaN, otherwise return the min/max. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ CmpUnD(FTMP, a, b); __ Bc1eqz(FTMP, &noNaNs); @@ -584,7 +581,7 @@ static void GenMinMaxFP(LocationSummary* locations, __ MaxD(out, a, b); } } else { - DCHECK_EQ(type, Primitive::kPrimFloat); + DCHECK_EQ(type, DataType::Type::kFloat32); __ CmpUnS(FTMP, a, b); __ Bc1eqz(FTMP, &noNaNs); @@ -611,10 +608,9 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bind(&done); } -static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); @@ -622,38 +618,38 @@ static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { // double java.lang.Math.min(double, double) void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimDouble, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat64, GetAssembler()); } // float java.lang.Math.min(float, float) void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, Primitive::kPrimFloat, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ true, DataType::Type::kFloat32, GetAssembler()); } // double java.lang.Math.max(double, double) void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimDouble, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat64, GetAssembler()); } // float java.lang.Math.max(float, float) void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { - GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, Primitive::kPrimFloat, GetAssembler()); + GenMinMaxFP(invoke->GetLocations(), /* is_min */ false, DataType::Type::kFloat32, GetAssembler()); } static void GenMinMax(LocationSummary* locations, @@ -714,10 +710,9 @@ static void GenMinMax(LocationSummary* locations, } } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); @@ -725,7 +720,7 @@ static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { // int java.lang.Math.min(int, int) void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { @@ -734,7 +729,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { // long java.lang.Math.min(long, long) void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { @@ -743,7 +738,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { // int java.lang.Math.max(int, int) void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { @@ -752,7 +747,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { // long java.lang.Math.max(long, long) void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { @@ -761,7 +756,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { // double java.lang.Math.sqrt(double) void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { @@ -773,19 +768,18 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { __ SqrtD(out, in); } -static void CreateFPToFP(ArenaAllocator* arena, +static void CreateFPToFP(ArenaAllocator* allocator, HInvoke* invoke, Location::OutputOverlap overlaps = Location::kOutputOverlap) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister(), overlaps); } // double java.lang.Math.rint(double) void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) { - CreateFPToFP(arena_, invoke, Location::kNoOutputOverlap); + CreateFPToFP(allocator_, invoke, Location::kNoOutputOverlap); } void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { @@ -799,7 +793,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { // double java.lang.Math.floor(double) void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) { - CreateFPToFP(arena_, invoke); + CreateFPToFP(allocator_, invoke); } const constexpr uint16_t kFPLeaveUnchanged = kPositiveZero | @@ -876,19 +870,19 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) { // double java.lang.Math.ceil(double) void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) { - CreateFPToFP(arena_, invoke); + CreateFPToFP(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { GenRoundingMode(invoke->GetLocations(), kCeil, GetAssembler()); } -static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Primitive::Type type) { +static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, DataType::Type type) { FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); FpuRegister half = locations->GetTemp(0).AsFpuRegister<FpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - DCHECK(type == Primitive::kPrimFloat || type == Primitive::kPrimDouble); + DCHECK(type == DataType::Type::kFloat32 || type == DataType::Type::kFloat64); Mips64Label done; @@ -901,7 +895,7 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri // return out; // out = floor(in); - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ FloorLD(FTMP, in); __ Dmfc1(out, FTMP); } else { @@ -910,7 +904,7 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri } // if (out != MAX_VALUE && out != MIN_VALUE) - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ Daddiu(TMP, out, 1); __ Dati(TMP, 0x8000); // TMP = out + 0x8000 0000 0000 0001 // or out - 0x7FFF FFFF FFFF FFFF. @@ -931,7 +925,7 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri } // TMP = (0.5 <= (in - out)) ? -1 : 0; - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ Cvtdl(FTMP, FTMP); // Convert output of floor.l.d back to "double". __ LoadConst64(AT, bit_cast<int64_t, double>(0.5)); __ SubD(FTMP, in, FTMP); @@ -948,7 +942,7 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri } // Return out -= TMP. - if (type == Primitive::kPrimDouble) { + if (type == DataType::Type::kFloat64) { __ Dsubu(out, out, TMP); } else { __ Subu(out, out, TMP); @@ -959,35 +953,33 @@ static void GenRound(LocationSummary* locations, Mips64Assembler* assembler, Pri // int java.lang.Math.round(float) void IntrinsicLocationsBuilderMIPS64::VisitMathRoundFloat(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitMathRoundFloat(HInvoke* invoke) { - GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimFloat); + GenRound(invoke->GetLocations(), GetAssembler(), DataType::Type::kFloat32); } // long java.lang.Math.round(double) void IntrinsicLocationsBuilderMIPS64::VisitMathRoundDouble(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } void IntrinsicCodeGeneratorMIPS64::VisitMathRoundDouble(HInvoke* invoke) { - GenRound(invoke->GetLocations(), GetAssembler(), Primitive::kPrimDouble); + GenRound(invoke->GetLocations(), GetAssembler(), DataType::Type::kFloat64); } // byte libcore.io.Memory.peekByte(long address) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { @@ -1000,7 +992,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { // short libcore.io.Memory.peekShort(long address) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { @@ -1013,7 +1005,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { // int libcore.io.Memory.peekInt(long address) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { @@ -1026,7 +1018,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { // long libcore.io.Memory.peekLong(long address) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { @@ -1037,17 +1029,16 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { __ Ld(out, adr, 0); } -static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); } // void libcore.io.Memory.pokeByte(long address, byte value) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { @@ -1060,7 +1051,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { // void libcore.io.Memory.pokeShort(long address, short value) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { @@ -1073,7 +1064,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { // void libcore.io.Memory.pokeInt(long address, int value) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { @@ -1086,7 +1077,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { // void libcore.io.Memory.pokeLong(long address, long value) void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { @@ -1099,9 +1090,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { // Thread java.lang.Thread.currentThread() void IntrinsicLocationsBuilderMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -1115,17 +1105,18 @@ void IntrinsicCodeGeneratorMIPS64::VisitThreadCurrentThread(HInvoke* invoke) { Thread::PeerOffset<kMips64PointerSize>().Int32Value()); } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type) { + DataType::Type type) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -1134,7 +1125,7 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap)); - if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { + if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) { // We need a temporary register for the read barrier marking slow // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier. locations->AddTemp(Location::RequiresRegister()); @@ -1144,13 +1135,13 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, CodeGeneratorMIPS64* codegen) { LocationSummary* locations = invoke->GetLocations(); - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)) << type; + DCHECK((type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)) << type; Mips64Assembler* assembler = codegen->GetAssembler(); // Target register. Location trg_loc = locations->Out(); @@ -1162,26 +1153,26 @@ static void GenUnsafeGet(HInvoke* invoke, Location offset_loc = locations->InAt(2); GpuRegister offset = offset_loc.AsRegister<GpuRegister>(); - if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) { + if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == DataType::Type::kReference))) { __ Daddu(TMP, base, offset); } switch (type) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ Ld(trg, TMP, 0); if (is_volatile) { __ Sync(0); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ Lw(trg, TMP, 0); if (is_volatile) { __ Sync(0); } break; - case Primitive::kPrimNot: + case DataType::Type::kReference: if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Location temp = locations->GetTemp(0); @@ -1225,62 +1216,61 @@ static void GenUnsafeGet(HInvoke* invoke, // int sun.misc.Unsafe.getInt(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); } // int sun.misc.Unsafe.getIntVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); } // long sun.misc.Unsafe.getLong(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); } // long sun.misc.Unsafe.getLongVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); } // Object sun.misc.Unsafe.getObject(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); } // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kReference); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntIntIntToVoid(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1290,13 +1280,13 @@ static void CreateIntIntIntIntToVoid(ArenaAllocator* arena, HInvoke* invoke) { // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). static void GenUnsafePut(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool is_volatile, bool is_ordered, CodeGeneratorMIPS64* codegen) { - DCHECK((type == Primitive::kPrimInt) || - (type == Primitive::kPrimLong) || - (type == Primitive::kPrimNot)); + DCHECK((type == DataType::Type::kInt32) || + (type == DataType::Type::kInt64) || + (type == DataType::Type::kReference)); Mips64Assembler* assembler = codegen->GetAssembler(); // Object pointer. GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); @@ -1309,9 +1299,9 @@ static void GenUnsafePut(LocationSummary* locations, __ Sync(0); } switch (type) { - case Primitive::kPrimInt: - case Primitive::kPrimNot: - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + case DataType::Type::kInt32: + case DataType::Type::kReference: + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ PoisonHeapReference(AT, value); __ Sw(AT, TMP, 0); } else { @@ -1319,7 +1309,7 @@ static void GenUnsafePut(LocationSummary* locations, } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ Sd(value, TMP, 0); break; @@ -1331,7 +1321,7 @@ static void GenUnsafePut(LocationSummary* locations, __ Sync(0); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(base, value, value_can_be_null); } @@ -1339,12 +1329,12 @@ static void GenUnsafePut(LocationSummary* locations, // void sun.misc.Unsafe.putInt(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ false, codegen_); @@ -1352,12 +1342,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePut(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ false, /* is_ordered */ true, codegen_); @@ -1365,12 +1355,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutOrdered(HInvoke* invoke) { // void sun.misc.Unsafe.putIntVolatile(Object o, long offset, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimInt, + DataType::Type::kInt32, /* is_volatile */ true, /* is_ordered */ false, codegen_); @@ -1378,12 +1368,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutVolatile(HInvoke* invoke) { // void sun.misc.Unsafe.putObject(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ false, codegen_); @@ -1391,12 +1381,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObject(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ false, /* is_ordered */ true, codegen_); @@ -1404,12 +1394,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectOrdered(HInvoke* invoke) // void sun.misc.Unsafe.putObjectVolatile(Object o, long offset, Object x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimNot, + DataType::Type::kReference, /* is_volatile */ true, /* is_ordered */ false, codegen_); @@ -1417,12 +1407,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutObjectVolatile(HInvoke* invoke) // void sun.misc.Unsafe.putLong(Object o, long offset, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ false, codegen_); @@ -1430,12 +1420,12 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLong(HInvoke* invoke) { // void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ false, /* is_ordered */ true, codegen_); @@ -1443,26 +1433,27 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongOrdered(HInvoke* invoke) { // void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoid(arena_, invoke); + CreateIntIntIntIntToVoid(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafePutLongVolatile(HInvoke* invoke) { GenUnsafePut(invoke->GetLocations(), - Primitive::kPrimLong, + DataType::Type::kInt64, /* is_volatile */ true, /* is_ordered */ false, codegen_); } -static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1478,7 +1469,7 @@ static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* // Note that the caller must supply a properly aligned memory address. // If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur). -static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) { +static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorMIPS64* codegen) { Mips64Assembler* assembler = codegen->GetAssembler(); LocationSummary* locations = invoke->GetLocations(); GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>(); @@ -1493,7 +1484,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c DCHECK_NE(offset, out); DCHECK_NE(expected, out); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); @@ -1523,7 +1514,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c Mips64Label loop_head, exit_loop; __ Daddu(TMP, base, offset); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ PoisonHeapReference(expected); // Do not poison `value`, if it is the same register as // `expected`, which has just been poisoned. @@ -1539,13 +1530,13 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c __ Sync(0); __ Bind(&loop_head); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ Lld(out, TMP); } else { // Note: We will need a read barrier here, when read barrier // support is added to the MIPS64 back end. __ Ll(out, TMP); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The LL instruction sign-extends the 32-bit value, but // 32-bit references must be zero-extended. Zero-extend `out`. __ Dext(out, out, 0, 32); @@ -1559,7 +1550,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c // in the case that the store fails. Whether the // store succeeds, or fails, it will load the // correct Boolean value into the 'out' register. - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ Scd(out, TMP); } else { __ Sc(out, TMP); @@ -1569,7 +1560,7 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c __ Bind(&exit_loop); __ Sync(0); - if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + if (kPoisonHeapReferences && type == DataType::Type::kReference) { __ UnpoisonHeapReference(expected); // Do not unpoison `value`, if it is the same register as // `expected`, which has just been unpoisoned. @@ -1581,20 +1572,20 @@ static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* c // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimInt, codegen_); + GenCas(invoke, DataType::Type::kInt32, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x) void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCas(invoke, Primitive::kPrimLong, codegen_); + GenCas(invoke, DataType::Type::kInt64, codegen_); } // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x) @@ -1605,7 +1596,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke); + CreateIntIntIntIntIntToIntPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { @@ -1613,18 +1604,17 @@ void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) { // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCas(invoke, Primitive::kPrimNot, codegen_); + GenCas(invoke, DataType::Type::kReference, codegen_); } // int java.lang.String.compareTo(String anotherString) void IntrinsicLocationsBuilderMIPS64::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); } @@ -1636,7 +1626,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0))); GpuRegister argument = locations->InAt(1).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen_->AddSlowPath(slow_path); __ Beqzc(argument, slow_path->GetEntryLabel()); @@ -1646,9 +1637,15 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringCompareTo(HInvoke* invoke) { // boolean java.lang.String.equals(Object anObject) void IntrinsicLocationsBuilderMIPS64::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); @@ -1765,7 +1762,6 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringEquals(HInvoke* invoke) { static void GenerateStringIndexOf(HInvoke* invoke, Mips64Assembler* assembler, CodeGeneratorMIPS64* codegen, - ArenaAllocator* allocator, bool start_at_zero) { LocationSummary* locations = invoke->GetLocations(); GpuRegister tmp_reg = start_at_zero ? locations->GetTemp(0).AsRegister<GpuRegister>() : TMP; @@ -1782,16 +1778,16 @@ static void GenerateStringIndexOf(HInvoke* invoke, // Always needs the slow-path. We could directly dispatch to it, // but this case should be rare, so for simplicity just put the // full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); __ Bc(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { GpuRegister char_reg = locations->InAt(1).AsRegister<GpuRegister>(); __ LoadConst32(tmp_reg, std::numeric_limits<uint16_t>::max()); - slow_path = new (allocator) IntrinsicSlowPathMIPS64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen->AddSlowPath(slow_path); __ Bltuc(tmp_reg, char_reg, slow_path->GetEntryLabel()); // UTF-16 required } @@ -1812,15 +1808,14 @@ static void GenerateStringIndexOf(HInvoke* invoke, // int java.lang.String.indexOf(int ch) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); // Need a temp for slow-path codepoint compare, and need to send start-index=0. @@ -1828,40 +1823,37 @@ void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOf(HInvoke* invoke) { } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); } // int java.lang.String.indexOf(int ch, int fromIndex) void IntrinsicLocationsBuilderMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); // We have a hand-crafted assembly stub that follows the runtime // calling convention. So it's best to align the inputs accordingly. InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); } void IntrinsicCodeGeneratorMIPS64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf( - invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); } // java.lang.StringFactory.newStringFromBytes(byte[] data, int high, int offset, int byteCount) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); } @@ -1870,7 +1862,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke LocationSummary* locations = invoke->GetLocations(); GpuRegister byte_array = locations->InAt(0).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen_->AddSlowPath(slow_path); __ Beqzc(byte_array, slow_path->GetEntryLabel()); @@ -1881,14 +1874,13 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromBytes(HInvoke* invoke // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); } @@ -1905,12 +1897,11 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromChars(HInvoke* invoke // java.lang.StringFactory.newStringFromString(String toCopy) void IntrinsicLocationsBuilderMIPS64::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); - Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt); + Location outLocation = calling_convention.GetReturnLocation(DataType::Type::kInt32); locations->SetOut(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>())); } @@ -1919,7 +1910,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringNewStringFromString(HInvoke* invok LocationSummary* locations = invoke->GetLocations(); GpuRegister string_to_copy = locations->InAt(0).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen_->AddSlowPath(slow_path); __ Beqzc(string_to_copy, slow_path->GetEntryLabel()); @@ -1946,7 +1938,7 @@ static void GenIsInfinite(LocationSummary* locations, // boolean java.lang.Float.isInfinite(float) void IntrinsicLocationsBuilderMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { @@ -1955,7 +1947,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitFloatIsInfinite(HInvoke* invoke) { // boolean java.lang.Double.isInfinite(double) void IntrinsicLocationsBuilderMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { @@ -1964,9 +1956,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitDoubleIsInfinite(HInvoke* invoke) { // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin) void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -1983,9 +1974,9 @@ void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) { LocationSummary* locations = invoke->GetLocations(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); - const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); GpuRegister srcObj = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister srcBegin = locations->InAt(1).AsRegister<GpuRegister>(); @@ -2081,7 +2072,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) // Okay, it is safe to generate inline code. LocationSummary* locations = - new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); @@ -2177,7 +2168,8 @@ void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { GpuRegister src_base = locations->GetTemp(1).AsRegister<GpuRegister>(); GpuRegister count = locations->GetTemp(2).AsRegister<GpuRegister>(); - SlowPathCodeMIPS64* slow_path = new (GetAllocator()) IntrinsicSlowPathMIPS64(invoke); + SlowPathCodeMIPS64* slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathMIPS64(invoke); codegen_->AddSlowPath(slow_path); // Bail out if the source and destination are the same (to handle overlap). @@ -2211,10 +2203,10 @@ void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); - const size_t char_shift = Primitive::ComponentSizeShift(Primitive::kPrimChar); + const size_t char_shift = DataType::SizeShift(DataType::Type::kUint16); const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -2248,14 +2240,14 @@ void IntrinsicCodeGeneratorMIPS64::VisitSystemArrayCopyChar(HInvoke* invoke) { } static void GenHighestOneBit(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, Mips64Assembler* assembler) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ Dclz(TMP, in); __ LoadConst64(AT, INT64_C(0x8000000000000000)); __ Dsrlv(AT, AT, TMP); @@ -2275,31 +2267,31 @@ static void GenHighestOneBit(LocationSummary* locations, // int java.lang.Integer.highestOneBit(int) void IntrinsicLocationsBuilderMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } // long java.lang.Long.highestOneBit(long) void IntrinsicLocationsBuilderMIPS64::VisitLongHighestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongHighestOneBit(HInvoke* invoke) { - GenHighestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenHighestOneBit(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } static void GenLowestOneBit(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, Mips64Assembler* assembler) { - DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong) << PrettyDescriptor(type); + DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64) << type; GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); GpuRegister out = locations->Out().AsRegister<GpuRegister>(); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ Dsubu(TMP, ZERO, in); } else { __ Subu(TMP, ZERO, in); @@ -2309,41 +2301,39 @@ static void GenLowestOneBit(LocationSummary* locations, // int java.lang.Integer.lowestOneBit(int) void IntrinsicLocationsBuilderMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitIntegerLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } // long java.lang.Long.lowestOneBit(long) void IntrinsicLocationsBuilderMIPS64::VisitLongLowestOneBit(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitLongLowestOneBit(HInvoke* invoke) { - GenLowestOneBit(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenLowestOneBit(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); - locations->SetOut(calling_convention.GetReturnLocation(Primitive::kPrimDouble)); + locations->SetOut(calling_convention.GetReturnLocation(DataType::Type::kFloat64)); } static void GenFPToFPCall(HInvoke* invoke, @@ -2374,7 +2364,7 @@ static void GenFPFPToFPCall(HInvoke* invoke, // static double java.lang.Math.cos(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathCos(HInvoke* invoke) { @@ -2383,7 +2373,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathCos(HInvoke* invoke) { // static double java.lang.Math.sin(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathSin(HInvoke* invoke) { @@ -2392,7 +2382,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathSin(HInvoke* invoke) { // static double java.lang.Math.acos(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAcos(HInvoke* invoke) { @@ -2401,7 +2391,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAcos(HInvoke* invoke) { // static double java.lang.Math.asin(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAsin(HInvoke* invoke) { @@ -2410,7 +2400,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAsin(HInvoke* invoke) { // static double java.lang.Math.atan(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAtan(HInvoke* invoke) { @@ -2419,16 +2409,25 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathAtan(HInvoke* invoke) { // static double java.lang.Math.atan2(double y, double x) void IntrinsicLocationsBuilderMIPS64::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathAtan2(HInvoke* invoke) { GenFPFPToFPCall(invoke, codegen_, kQuickAtan2); } +// static double java.lang.Math.pow(double y, double x) +void IntrinsicLocationsBuilderMIPS64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathPow(HInvoke* invoke) { + GenFPFPToFPCall(invoke, codegen_, kQuickPow); +} + // static double java.lang.Math.cbrt(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathCbrt(HInvoke* invoke) { @@ -2437,7 +2436,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathCbrt(HInvoke* invoke) { // static double java.lang.Math.cosh(double x) void IntrinsicLocationsBuilderMIPS64::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathCosh(HInvoke* invoke) { @@ -2446,7 +2445,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathCosh(HInvoke* invoke) { // static double java.lang.Math.exp(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathExp(HInvoke* invoke) { @@ -2455,7 +2454,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathExp(HInvoke* invoke) { // static double java.lang.Math.expm1(double x) void IntrinsicLocationsBuilderMIPS64::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathExpm1(HInvoke* invoke) { @@ -2464,7 +2463,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathExpm1(HInvoke* invoke) { // static double java.lang.Math.hypot(double x, double y) void IntrinsicLocationsBuilderMIPS64::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathHypot(HInvoke* invoke) { @@ -2473,7 +2472,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathHypot(HInvoke* invoke) { // static double java.lang.Math.log(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathLog(HInvoke* invoke) { @@ -2482,7 +2481,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathLog(HInvoke* invoke) { // static double java.lang.Math.log10(double x) void IntrinsicLocationsBuilderMIPS64::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathLog10(HInvoke* invoke) { @@ -2491,7 +2490,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathLog10(HInvoke* invoke) { // static double java.lang.Math.nextAfter(double start, double direction) void IntrinsicLocationsBuilderMIPS64::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathNextAfter(HInvoke* invoke) { @@ -2500,7 +2499,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathNextAfter(HInvoke* invoke) { // static double java.lang.Math.sinh(double x) void IntrinsicLocationsBuilderMIPS64::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathSinh(HInvoke* invoke) { @@ -2509,7 +2508,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathSinh(HInvoke* invoke) { // static double java.lang.Math.tan(double a) void IntrinsicLocationsBuilderMIPS64::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathTan(HInvoke* invoke) { @@ -2518,7 +2517,7 @@ void IntrinsicCodeGeneratorMIPS64::VisitMathTan(HInvoke* invoke) { // static double java.lang.Math.tanh(double x) void IntrinsicLocationsBuilderMIPS64::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorMIPS64::VisitMathTanh(HInvoke* invoke) { @@ -2531,7 +2530,7 @@ void IntrinsicLocationsBuilderMIPS64::VisitIntegerValueOf(HInvoke* invoke) { IntrinsicVisitor::ComputeIntegerValueOfLocations( invoke, codegen_, - calling_convention.GetReturnLocation(Primitive::kPrimNot), + calling_convention.GetReturnLocation(DataType::Type::kReference), Location::RegisterLocation(calling_convention.GetRegisterAt(0))); } @@ -2603,6 +2602,34 @@ void IntrinsicCodeGeneratorMIPS64::VisitIntegerValueOf(HInvoke* invoke) { } } +// static boolean java.lang.Thread.interrupted() +void IntrinsicLocationsBuilderMIPS64::VisitThreadInterrupted(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetOut(Location::RequiresRegister()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitThreadInterrupted(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + int32_t offset = Thread::InterruptedOffset<kMips64PointerSize>().Int32Value(); + __ LoadFromOffset(kLoadWord, out, TR, offset); + Mips64Label done; + __ Beqzc(out, &done); + __ Sync(0); + __ StoreToOffset(kStoreWord, ZERO, TR, offset); + __ Sync(0); + __ Bind(&done); +} + +void IntrinsicLocationsBuilderMIPS64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorMIPS64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(MIPS64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(MIPS64, SystemArrayCopy) @@ -2622,8 +2649,6 @@ UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetInt) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong) UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject) -UNIMPLEMENTED_INTRINSIC(MIPS64, ThreadInterrupted) - UNREACHABLE_INTRINSICS(MIPS64) #undef __ diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h index 179627ab20..6f40d90ddb 100644 --- a/compiler/optimizing/intrinsics_mips64.h +++ b/compiler/optimizing/intrinsics_mips64.h @@ -39,7 +39,7 @@ class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -49,8 +49,8 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - CodeGeneratorMIPS64* codegen_; - ArenaAllocator* arena_; + CodeGeneratorMIPS64* const codegen_; + ArenaAllocator* const allocator_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); }; @@ -64,7 +64,7 @@ class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -73,7 +73,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); - CodeGeneratorMIPS64* codegen_; + CodeGeneratorMIPS64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64); }; diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc index a18b0cc400..0763ef2352 100644 --- a/compiler/optimizing/intrinsics_x86.cc +++ b/compiler/optimizing/intrinsics_x86.cc @@ -23,6 +23,7 @@ #include "base/bit_utils.h" #include "code_generator_x86.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" @@ -45,7 +46,7 @@ static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000); static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000); IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen) - : arena_(codegen->GetGraph()->GetArena()), + : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { } @@ -55,7 +56,7 @@ X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) { @@ -96,7 +97,7 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + int32_t element_size = DataType::Size(DataType::Type::kReference); uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value(); Register src = locations->InAt(0).AsRegister<Register>(); @@ -174,10 +175,9 @@ class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode { #define __ assembler-> -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); if (is64bit) { @@ -185,10 +185,9 @@ static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool } } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); if (is64bit) { @@ -229,10 +228,10 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* } void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke, /* is64bit */ true); + CreateFPToIntLocations(allocator_, invoke, /* is64bit */ true); } void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke, /* is64bit */ true); + CreateIntToFPLocations(allocator_, invoke, /* is64bit */ true); } void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -243,10 +242,10 @@ void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke, /* is64bit */ false); + CreateFPToIntLocations(allocator_, invoke, /* is64bit */ false); } void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke, /* is64bit */ false); + CreateIntToFPLocations(allocator_, invoke, /* is64bit */ false); } void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -256,42 +255,39 @@ void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } -static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); } -static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); } static void GenReverseBytes(LocationSummary* locations, - Primitive::Type size, + DataType::Type size, X86Assembler* assembler) { Register out = locations->Out().AsRegister<Register>(); switch (size) { - case Primitive::kPrimShort: + case DataType::Type::kInt16: // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. __ bswapl(out); __ sarl(out, Immediate(16)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ bswapl(out); break; default: @@ -301,15 +297,15 @@ static void GenReverseBytes(LocationSummary* locations, } void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) { - CreateLongToLongLocations(arena_, invoke); + CreateLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { @@ -330,22 +326,21 @@ void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we // need is 64b. -static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFloatToFloat(ArenaAllocator* allocator, HInvoke* invoke) { // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); @@ -400,7 +395,7 @@ static void MathAbsFP(HInvoke* invoke, } void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloat(arena_, invoke); + CreateFloatToFloat(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { @@ -408,17 +403,16 @@ void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloat(arena_, invoke); + CreateFloatToFloat(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_); } -static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateAbsIntLocation(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RegisterLocation(EAX)); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RegisterLocation(EDX)); @@ -443,10 +437,9 @@ static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) { // The result is in EAX. } -static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateAbsLongLocation(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap); locations->AddTemp(Location::RequiresRegister()); @@ -479,7 +472,7 @@ static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) { } void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) { - CreateAbsIntLocation(arena_, invoke); + CreateAbsIntLocation(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { @@ -487,7 +480,7 @@ void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) { - CreateAbsLongLocation(arena_, invoke); + CreateAbsLongLocation(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) { @@ -597,10 +590,9 @@ static void GenMinMaxFP(HInvoke* invoke, __ Bind(&done); } -static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); // The following is sub-optimal, but all we can do for now. It would be fine to also accept @@ -615,7 +607,7 @@ static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -627,7 +619,7 @@ void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -639,7 +631,7 @@ void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -651,7 +643,7 @@ void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFPLocations(arena_, invoke); + CreateFPFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -717,19 +709,17 @@ static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, } } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } -static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLongLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); @@ -738,7 +728,7 @@ static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke } void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { @@ -746,7 +736,7 @@ void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(arena_, invoke); + CreateLongLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { @@ -754,7 +744,7 @@ void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { @@ -762,23 +752,22 @@ void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) { - CreateLongLongToLongLocations(arena_, invoke); + CreateLongLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); } void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) { @@ -804,18 +793,18 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) } } -static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, - HInvoke* invoke, - CodeGeneratorX86* codegen) { +static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86* codegen) { // Do we have instruction support? if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { - CreateFPToFPLocations(arena, invoke); + CreateFPToFPLocations(allocator, invoke); return; } // We have to fall back to a call to the intrinsic. - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -838,7 +827,7 @@ static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen, } void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { @@ -846,7 +835,7 @@ void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { @@ -854,7 +843,7 @@ void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) { @@ -866,9 +855,8 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) { HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect(); DCHECK(static_or_direct != nullptr); - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); if (static_or_direct->HasSpecialInput() && invoke->InputAt( @@ -882,8 +870,8 @@ void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) { } // We have to fall back to a call to the intrinsic. - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); @@ -950,11 +938,9 @@ void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) { __ Bind(&done); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, - HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -991,7 +977,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntry } void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { @@ -999,7 +985,7 @@ void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { @@ -1007,7 +993,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { @@ -1015,7 +1001,7 @@ void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { @@ -1023,7 +1009,7 @@ void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { @@ -1031,7 +1017,7 @@ void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { @@ -1039,7 +1025,7 @@ void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { @@ -1047,7 +1033,7 @@ void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { @@ -1055,7 +1041,7 @@ void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { @@ -1063,7 +1049,7 @@ void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { @@ -1071,7 +1057,7 @@ void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { @@ -1079,7 +1065,7 @@ void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { @@ -1087,7 +1073,7 @@ void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { @@ -1095,18 +1081,16 @@ void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, - HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); @@ -1114,15 +1098,23 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { @@ -1130,7 +1122,7 @@ void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) { @@ -1173,7 +1165,7 @@ void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, it is safe to generate inline code. LocationSummary* locations = - new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(Object src, int srcPos, Object dest, int destPos, int length). locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); @@ -1269,7 +1261,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { Register count = locations->GetTemp(2).AsRegister<Register>(); DCHECK_EQ(count, ECX); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); // Bail out if the source and destination are the same (to handle overlap). @@ -1306,7 +1298,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1335,9 +1327,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) { void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) { // The inputs plus one temp. - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1353,7 +1344,7 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { Register argument = locations->InAt(1).AsRegister<Register>(); __ testl(argument, argument); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1362,9 +1353,15 @@ void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1503,7 +1500,6 @@ static void CreateStringIndexOfLocations(HInvoke* invoke, static void GenerateStringIndexOf(HInvoke* invoke, X86Assembler* assembler, CodeGeneratorX86* codegen, - ArenaAllocator* allocator, bool start_at_zero) { LocationSummary* locations = invoke->GetLocations(); @@ -1533,15 +1529,15 @@ static void GenerateStringIndexOf(HInvoke* invoke, std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); - slow_path = new (allocator) IntrinsicSlowPathX86(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen->AddSlowPath(slow_path); __ j(kAbove, slow_path->GetEntryLabel()); } @@ -1654,26 +1650,24 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); } void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); } void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); } void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf( - invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1688,7 +1682,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { Register byte_array = locations->InAt(0).AsRegister<Register>(); __ testl(byte_array, byte_array); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1698,9 +1692,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1720,9 +1713,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(EAX)); @@ -1734,7 +1726,7 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) Register string_to_copy = locations->InAt(0).AsRegister<Register>(); __ testl(string_to_copy, string_to_copy); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1745,9 +1737,8 @@ void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); // Place srcEnd in ECX to save a move below. @@ -1765,7 +1756,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { X86Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); + size_t char_component_size = DataType::Size(DataType::Type::kUint16); // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); // Location of char array data in string. @@ -1781,7 +1772,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { Register dstBegin = locations->InAt(4).AsRegister<Register>(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); // Compute the number of chars (words) to move. @@ -1801,7 +1792,7 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { if (mirror::kUseStringCompression) { // Location of count in string const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); __ pushl(EAX); __ cfi().AdjustCFAOffset(stack_adjust); @@ -1848,22 +1839,22 @@ void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ cfi().AdjustCFAOffset(-stack_adjust); } -static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { +static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { Register address = locations->InAt(0).AsRegisterPairLow<Register>(); Location out_loc = locations->Out(); // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0)); break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ movl(out_loc.AsRegister<Register>(), Address(address, 0)); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0)); __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4)); break; @@ -1874,58 +1865,58 @@ static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembl } void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) { - CreateLongToIntLocations(arena_, invoke); + CreateLongToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateLongToIntLocations(arena_, invoke); + CreateLongToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateLongToLongLocations(arena_, invoke); + CreateLongToLongLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateLongToIntLocations(arena_, invoke); + CreateLongToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } -static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size, +static void CreateLongIntToVoidLocations(ArenaAllocator* allocator, + DataType::Type size, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); HInstruction* value = invoke->InputAt(1); - if (size == Primitive::kPrimByte) { + if (size == DataType::Type::kInt8) { locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value)); } else { locations->SetInAt(1, Location::RegisterOrConstant(value)); } } -static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) { +static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) { Register address = locations->InAt(0).AsRegisterPairLow<Register>(); Location value_loc = locations->InAt(1); // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: if (value_loc.IsConstant()) { __ movb(Address(address, 0), Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); @@ -1933,7 +1924,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembl __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>()); } break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: if (value_loc.IsConstant()) { __ movw(Address(address, 0), Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); @@ -1941,7 +1932,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembl __ movw(Address(address, 0), value_loc.AsRegister<Register>()); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (value_loc.IsConstant()) { __ movl(Address(address, 0), Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue())); @@ -1949,7 +1940,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembl __ movl(Address(address, 0), value_loc.AsRegister<Register>()); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (value_loc.IsConstant()) { int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue(); __ movl(Address(address, 0), Immediate(Low32Bits(value))); @@ -1966,41 +1957,40 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembl } void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) { - CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke); + CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke); + CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke); + CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke); + CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke); } void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -2010,7 +2000,7 @@ void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) { } static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile, CodeGeneratorX86* codegen) { X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); @@ -2022,13 +2012,13 @@ static void GenUnsafeGet(HInvoke* invoke, Location output_loc = locations->Out(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { Register output = output_loc.AsRegister<Register>(); __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { Register output = output_loc.AsRegister<Register>(); if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { @@ -2047,7 +2037,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { Register output_lo = output_loc.AsRegisterPairLow<Register>(); Register output_hi = output_loc.AsRegisterPairHigh<Register>(); if (is_volatile) { @@ -2070,25 +2060,26 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { if (is_volatile) { // Need to use XMM to read volatile. locations->AddTemp(Location::RequiresFpuRegister()); @@ -2103,62 +2094,65 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false); + CreateIntIntIntToIntLocations( + allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false); + CreateIntIntIntToIntLocations( + allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true); + CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false); + CreateIntIntIntToIntLocations( + allocator_, invoke, DataType::Type::kReference, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true); + CreateIntIntIntToIntLocations( + allocator_, invoke, DataType::Type::kReference, /* is_volatile */ true); } void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, - Primitive::Type type, +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, + DataType::Type type, HInvoke* invoke, bool is_volatile) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Need temp registers for card-marking. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. // Ensure the value is in a byte register. locations->AddTemp(Location::RegisterLocation(ECX)); - } else if (type == Primitive::kPrimLong && is_volatile) { + } else if (type == DataType::Type::kInt64 && is_volatile) { locations->AddTemp(Location::RequiresFpuRegister()); locations->AddTemp(Location::RequiresFpuRegister()); } @@ -2166,45 +2160,45 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt32, invoke, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kReference, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kReference, invoke, /* is_volatile */ true); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ false); } void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { CreateIntIntIntIntToVoidPlusTempsLocations( - arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true); + allocator_, DataType::Type::kInt64, invoke, /* is_volatile */ true); } // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 // memory model. static void GenUnsafePut(LocationSummary* locations, - Primitive::Type type, + DataType::Type type, bool is_volatile, CodeGeneratorX86* codegen) { X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); @@ -2212,7 +2206,7 @@ static void GenUnsafePut(LocationSummary* locations, Register offset = locations->InAt(2).AsRegisterPairLow<Register>(); Location value_loc = locations->InAt(3); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { Register value_lo = value_loc.AsRegisterPairLow<Register>(); Register value_hi = value_loc.AsRegisterPairHigh<Register>(); if (is_volatile) { @@ -2226,7 +2220,7 @@ static void GenUnsafePut(LocationSummary* locations, __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo); __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi); } - } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + } else if (kPoisonHeapReferences && type == DataType::Type::kReference) { Register temp = locations->GetTemp(0).AsRegister<Register>(); __ movl(temp, value_loc.AsRegister<Register>()); __ PoisonHeapReference(temp); @@ -2239,7 +2233,7 @@ static void GenUnsafePut(LocationSummary* locations, codegen->MemoryFence(); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(), locations->GetTemp(1).AsRegister<Register>(), @@ -2250,44 +2244,48 @@ static void GenUnsafePut(LocationSummary* locations, } void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, - Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, + DataType::Type type, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); // Offset is a long, but in 32 bit mode, we only need the low word. @@ -2295,7 +2293,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(2, Location::RequiresRegister()); // Expected value must be in EAX or EDX:EAX. // For long, new value must be in ECX:EBX. - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX)); locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX)); } else { @@ -2305,7 +2303,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, // Force a byte register for the output. locations->SetOut(Location::RegisterLocation(EAX)); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Need temporary registers for card-marking, and possibly for // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. @@ -2315,11 +2313,11 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); } void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { @@ -2329,10 +2327,10 @@ void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); } -static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { +static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) { X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); @@ -2344,7 +2342,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // The address of the field within the holding object. Address field_addr(base, offset, ScaleFactor::TIMES_1, 0); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); @@ -2425,12 +2423,12 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code // `expected`, as it is the same as register `out` (EAX). } } else { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { // Ensure the expected value is in EAX (required by the CMPXCHG // instruction). DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX); __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>()); - } else if (type == Primitive::kPrimLong) { + } else if (type == DataType::Type::kInt64) { // Ensure the expected value is in EAX:EDX and that the new // value is in EBX:ECX (required by the CMPXCHG8B instruction). DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX); @@ -2452,11 +2450,11 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* code } void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) { - GenCAS(Primitive::kPrimInt, invoke, codegen_); + GenCAS(DataType::Type::kInt32, invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) { - GenCAS(Primitive::kPrimLong, invoke, codegen_); + GenCAS(DataType::Type::kInt64, invoke, codegen_); } void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { @@ -2464,13 +2462,12 @@ void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) { // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCAS(Primitive::kPrimNot, invoke, codegen_); + GenCAS(DataType::Type::kReference, invoke, codegen_); } void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); @@ -2511,9 +2508,8 @@ void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); @@ -2548,15 +2544,14 @@ void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) { } static void CreateBitCountLocations( - ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { + ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) { if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { // Do nothing if there is no popcnt support. This results in generating // a call for the intrinsic rather than direct code. return; } - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); if (is_long) { locations->AddTemp(Location::RequiresRegister()); } @@ -2605,7 +2600,7 @@ static void GenBitCount(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) { - CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { @@ -2613,17 +2608,16 @@ void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) { - CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true); + CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long */ true); } void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) { GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); if (is_long) { locations->SetInAt(0, Location::RequiresRegister()); } else { @@ -2710,7 +2704,7 @@ static void GenLeadingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { @@ -2718,17 +2712,16 @@ void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke } void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true); + CreateLeadingZeroLocations(allocator_, invoke, /* is_long */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); if (is_long) { locations->SetInAt(0, Location::RequiresRegister()); } else { @@ -2802,7 +2795,7 @@ static void GenTrailingZeros(X86Assembler* assembler, } void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ false); } void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { @@ -2810,7 +2803,7 @@ void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invok } void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true); + CreateTrailingZeroLocations(allocator_, invoke, /* is_long */ true); } void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -2823,16 +2816,16 @@ static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) // Compute base address for the System.arraycopy intrinsic in `base`. static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, - Primitive::Type type, + DataType::Type type, const Register& array, const Location& pos, const Register& base) { // This routine is only used by the SystemArrayCopy intrinsic at the - // moment. We can allow Primitive::kPrimNot as `type` to implement + // moment. We can allow DataType::Type::kReference as `type` to implement // the SystemArrayCopyChar intrinsic. - DCHECK_EQ(type, Primitive::kPrimNot); - const int32_t element_size = Primitive::ComponentSize(type); - const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + DCHECK_EQ(type, DataType::Type::kReference); + const int32_t element_size = DataType::Size(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (pos.IsConstant()) { @@ -2845,16 +2838,16 @@ static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler, // Compute end source address for the System.arraycopy intrinsic in `end`. static void GenSystemArrayCopyEndAddress(X86Assembler* assembler, - Primitive::Type type, + DataType::Type type, const Location& copy_length, const Register& base, const Register& end) { // This routine is only used by the SystemArrayCopy intrinsic at the - // moment. We can allow Primitive::kPrimNot as `type` to implement + // moment. We can allow DataType::Type::kReference as `type` to implement // the SystemArrayCopyChar intrinsic. - DCHECK_EQ(type, Primitive::kPrimNot); - const int32_t element_size = Primitive::ComponentSize(type); - const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + DCHECK_EQ(type, DataType::Type::kReference); + const int32_t element_size = DataType::Size(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); if (copy_length.IsConstant()) { int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue(); @@ -2921,7 +2914,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { Location temp2_loc = locations->GetTemp(1); Register temp2 = temp2_loc.AsRegister<Register>(); - SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke); + SlowPathCode* intrinsic_slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke); codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; @@ -3168,8 +3162,8 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); + const DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); // Compute the base source address in `temp1`. GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1); @@ -3235,7 +3229,7 @@ void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) { // Slow path used to copy array when `src` is gray. SlowPathCode* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); + new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke); codegen_->AddSlowPath(read_barrier_slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. @@ -3347,9 +3341,8 @@ void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -3366,6 +3359,13 @@ void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble) UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent) diff --git a/compiler/optimizing/intrinsics_x86.h b/compiler/optimizing/intrinsics_x86.h index 3743cb1371..e3555e78fc 100644 --- a/compiler/optimizing/intrinsics_x86.h +++ b/compiler/optimizing/intrinsics_x86.h @@ -39,7 +39,7 @@ class IntrinsicLocationsBuilderX86 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -49,8 +49,8 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - ArenaAllocator* arena_; - CodeGeneratorX86* codegen_; + ArenaAllocator* const allocator_; + CodeGeneratorX86* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86); }; @@ -64,7 +64,7 @@ class IntrinsicCodeGeneratorX86 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -73,7 +73,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); - CodeGeneratorX86* codegen_; + CodeGeneratorX86* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86); }; diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc index 5abdb1d1bd..91a505ede1 100644 --- a/compiler/optimizing/intrinsics_x86_64.cc +++ b/compiler/optimizing/intrinsics_x86_64.cc @@ -23,6 +23,7 @@ #include "base/bit_utils.h" #include "code_generator_x86_64.h" #include "entrypoints/quick/quick_entrypoints.h" +#include "heap_poisoning.h" #include "intrinsics.h" #include "intrinsics_utils.h" #include "lock_word.h" @@ -40,7 +41,7 @@ namespace art { namespace x86_64 { IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen) - : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) { + : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) { } X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { @@ -48,7 +49,7 @@ X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() { } ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() { - return codegen_->GetGraph()->GetArena(); + return codegen_->GetGraph()->GetAllocator(); } bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) { @@ -89,7 +90,7 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { DCHECK(instruction_->GetLocations()->Intrinsified()); DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy); - int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot); + int32_t element_size = DataType::Size(DataType::Type::kReference); CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>(); CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>(); @@ -127,18 +128,16 @@ class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode { #define __ assembler-> -static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); } -static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::RequiresFpuRegister()); } @@ -156,10 +155,10 @@ static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assemble } void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { @@ -170,10 +169,10 @@ void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) } void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { - CreateFPToIntLocations(arena_, invoke); + CreateFPToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { - CreateIntToFPLocations(arena_, invoke); + CreateIntToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { @@ -183,29 +182,28 @@ void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) { MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler()); } -static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } static void GenReverseBytes(LocationSummary* locations, - Primitive::Type size, + DataType::Type size, X86_64Assembler* assembler) { CpuRegister out = locations->Out().AsRegister<CpuRegister>(); switch (size) { - case Primitive::kPrimShort: + case DataType::Type::kInt16: // TODO: Can be done with an xchg of 8b registers. This is straight from Quick. __ bswapl(out); __ sarl(out, Immediate(16)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ bswapl(out); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ bswapq(out); break; default: @@ -215,38 +213,37 @@ static void GenReverseBytes(LocationSummary* locations, } void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) { - GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we // need is 64b. -static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateFloatToFloatPlusTemps(ArenaAllocator* allocator, HInvoke* invoke) { // TODO: Enable memory operations when the assembler supports them. - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask. @@ -274,7 +271,7 @@ static void MathAbsFP(LocationSummary* locations, } void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(arena_, invoke); + CreateFloatToFloatPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { @@ -282,17 +279,16 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) { - CreateFloatToFloatPlusTemps(arena_, invoke); + CreateFloatToFloatPlusTemps(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) { MathAbsFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler(), codegen_); } -static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntToIntPlusTemp(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); @@ -321,7 +317,7 @@ static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assemb } void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) { - CreateIntToIntPlusTemp(arena_, invoke); + CreateIntToIntPlusTemp(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { @@ -329,7 +325,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) { - CreateIntToIntPlusTemp(arena_, invoke); + CreateIntToIntPlusTemp(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) { @@ -420,10 +416,9 @@ static void GenMinMaxFP(LocationSummary* locations, __ Bind(&done); } -static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPFPToFP(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetInAt(1, Location::RequiresFpuRegister()); // The following is sub-optimal, but all we can do for now. It would be fine to also accept @@ -432,7 +427,7 @@ static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(arena_, invoke); + CreateFPFPToFP(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { @@ -441,7 +436,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(arena_, invoke); + CreateFPFPToFP(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { @@ -450,7 +445,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { - CreateFPFPToFP(arena_, invoke); + CreateFPFPToFP(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { @@ -459,7 +454,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { - CreateFPFPToFP(arena_, invoke); + CreateFPFPToFP(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) { @@ -499,17 +494,16 @@ static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long, __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long); } -static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); } void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { @@ -517,7 +511,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { @@ -525,7 +519,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { @@ -533,23 +527,22 @@ void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) { - CreateIntIntToIntLocations(arena_, invoke); + CreateIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) { GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler()); } -static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresFpuRegister()); } void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) { - CreateFPToFPLocations(arena_, invoke); + CreateFPToFPLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) { @@ -575,18 +568,18 @@ static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invo } } -static void CreateSSE41FPToFPLocations(ArenaAllocator* arena, - HInvoke* invoke, - CodeGeneratorX86_64* codegen) { +static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { // Do we have instruction support? if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { - CreateFPToFPLocations(arena, invoke); + CreateFPToFPLocations(allocator, invoke); return; } // We have to fall back to a call to the intrinsic. - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -609,7 +602,7 @@ static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen, } void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { @@ -617,7 +610,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { @@ -625,21 +618,20 @@ void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) { - CreateSSE41FPToFPLocations(arena_, invoke, codegen_); + CreateSSE41FPToFPLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) { GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0); } -static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, - HInvoke* invoke, - CodeGeneratorX86_64* codegen) { +static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator, + HInvoke* invoke, + CodeGeneratorX86_64* codegen) { // Do we have instruction support? if (codegen->GetInstructionSetFeatures().HasSSE4_1()) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresFpuRegister()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(Location::RequiresFpuRegister()); @@ -648,8 +640,8 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } // We have to fall back to a call to the intrinsic. - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); @@ -658,7 +650,7 @@ static void CreateSSE41FPToIntLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + CreateSSE41FPToIntLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { @@ -702,7 +694,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) { - CreateSSE41FPToIntLocations(arena_, invoke, codegen_); + CreateSSE41FPToIntLocations(allocator_, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { @@ -745,11 +737,9 @@ void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) { __ Bind(&done); } -static void CreateFPToFPCallLocations(ArenaAllocator* arena, - HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetOut(Location::FpuRegisterLocation(XMM0)); @@ -772,7 +762,7 @@ static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen, } void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { @@ -780,7 +770,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { @@ -788,7 +778,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { @@ -796,7 +786,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { @@ -804,7 +794,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { @@ -812,7 +802,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { @@ -820,7 +810,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { @@ -828,7 +818,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { @@ -836,7 +826,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { @@ -844,7 +834,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { @@ -852,7 +842,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { @@ -860,7 +850,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { @@ -868,7 +858,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { @@ -876,18 +866,16 @@ void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) { - CreateFPToFPCallLocations(arena_, invoke); + CreateFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickTanh); } -static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, - HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); +static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0))); locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1))); @@ -902,15 +890,23 @@ static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) { GenFPToFPCall(invoke, codegen_, kQuickAtan2); } +void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) { + CreateFPFPToFPCallLocations(allocator_, invoke); +} + +void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) { + GenFPToFPCall(invoke, codegen_, kQuickPow); +} + void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { @@ -918,7 +914,7 @@ void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) { - CreateFPFPToFPCallLocations(arena_, invoke); + CreateFPFPToFPCallLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) { @@ -948,9 +944,8 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) } } - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnSlowPath, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified); // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length). locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); @@ -1046,7 +1041,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>(); DCHECK_EQ(count.AsRegister(), RCX); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); // Bail out if the source and destination are the same. @@ -1083,7 +1078,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) { // Okay, everything checks out. Finally time to do the copy. // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value(); @@ -1124,7 +1119,7 @@ void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // source address for the System.arraycopy intrinsic in `src_base`, // `dst_base` and `src_end` respectively. static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, - Primitive::Type type, + DataType::Type type, const CpuRegister& src, const Location& src_pos, const CpuRegister& dst, @@ -1134,9 +1129,9 @@ static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler, const CpuRegister& dst_base, const CpuRegister& src_end) { // This routine is only used by the SystemArrayCopy intrinsic. - DCHECK_EQ(type, Primitive::kPrimNot); - const int32_t element_size = Primitive::ComponentSize(type); - const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type)); + DCHECK_EQ(type, DataType::Type::kReference); + const int32_t element_size = DataType::Size(type); + const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type)); const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value(); if (src_pos.IsConstant()) { @@ -1188,7 +1183,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>(); Location TMP_loc = Location::RegisterLocation(TMP); - SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + SlowPathCode* intrinsic_slow_path = + new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(intrinsic_slow_path); NearLabel conditions_on_positions_validated; @@ -1409,8 +1405,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel()); } - const Primitive::Type type = Primitive::kPrimNot; - const int32_t element_size = Primitive::ComponentSize(type); + const DataType::Type type = DataType::Type::kReference; + const int32_t element_size = DataType::Size(type); // Compute base source address, base destination address, and end // source address in `temp1`, `temp2` and `temp3` respectively. @@ -1462,7 +1458,7 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { // Slow path used to copy array when `src` is gray. SlowPathCode* read_barrier_slow_path = - new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); + new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke); codegen_->AddSlowPath(read_barrier_slow_path); // We have done the "if" of the gray bit check above, now branch based on the flags. @@ -1506,9 +1502,8 @@ void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1524,7 +1519,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>(); __ testl(argument, argument); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1533,9 +1528,15 @@ void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + if (kEmitCompilerReadBarrier && + !StringEqualsOptimizations(invoke).GetArgumentIsString() && + !StringEqualsOptimizations(invoke).GetNoReadBarrierForStringClass()) { + // No support for this odd case (String class is moveable, not in the boot image). + return; + } + + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RequiresRegister()); @@ -1670,7 +1671,6 @@ static void CreateStringIndexOfLocations(HInvoke* invoke, static void GenerateStringIndexOf(HInvoke* invoke, X86_64Assembler* assembler, CodeGeneratorX86_64* codegen, - ArenaAllocator* allocator, bool start_at_zero) { LocationSummary* locations = invoke->GetLocations(); @@ -1698,15 +1698,15 @@ static void GenerateStringIndexOf(HInvoke* invoke, std::numeric_limits<uint16_t>::max()) { // Always needs the slow-path. We could directly dispatch to it, but this case should be // rare, so for simplicity just put the full slow-path down and branch unconditionally. - slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen->AddSlowPath(slow_path); __ jmp(slow_path->GetEntryLabel()); __ Bind(slow_path->GetExitLabel()); return; } - } else if (code_point->GetType() != Primitive::kPrimChar) { + } else if (code_point->GetType() != DataType::Type::kUint16) { __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max())); - slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke); + slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen->AddSlowPath(slow_path); __ j(kAbove, slow_path->GetEntryLabel()); } @@ -1811,26 +1811,24 @@ static void GenerateStringIndexOf(HInvoke* invoke, } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ true); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) { - GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ true); } void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false); + CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero */ false); } void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) { - GenerateStringIndexOf( - invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false); + GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero */ false); } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1845,7 +1843,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>(); __ testl(byte_array, byte_array); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1855,9 +1853,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainOnly, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1))); @@ -1877,9 +1874,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke } void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kCallOnMainAndSlowPath, - kIntrinsified); + LocationSummary* locations = new (allocator_) LocationSummary( + invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified); InvokeRuntimeCallingConvention calling_convention; locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0))); locations->SetOut(Location::RegisterLocation(RAX)); @@ -1891,7 +1887,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>(); __ testl(string_to_copy, string_to_copy); - SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke); + SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke); codegen_->AddSlowPath(slow_path); __ j(kEqual, slow_path->GetEntryLabel()); @@ -1902,9 +1898,8 @@ void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invok void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin); - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1))); locations->SetInAt(2, Location::RequiresRegister()); @@ -1921,7 +1916,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { X86_64Assembler* assembler = GetAssembler(); LocationSummary* locations = invoke->GetLocations(); - size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar); + size_t char_component_size = DataType::Size(DataType::Type::kUint16); // Location of data in char array buffer. const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value(); // Location of char array data in string. @@ -1937,7 +1932,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>(); // Check assumption that sizeof(Char) is 2 (used in scaling below). - const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar); + const size_t char_size = DataType::Size(DataType::Type::kUint16); DCHECK_EQ(char_size, 2u); NearLabel done; @@ -1951,7 +1946,7 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { } if (mirror::kUseStringCompression) { NearLabel copy_uncompressed, copy_loop; - const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte); + const size_t c_char_size = DataType::Size(DataType::Type::kInt8); DCHECK_EQ(c_char_size, 1u); // Location of count in string. const uint32_t count_offset = mirror::String::CountOffset().Uint32Value(); @@ -1992,22 +1987,22 @@ void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) { __ Bind(&done); } -static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { +static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity. // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: __ movsxb(out, Address(address, 0)); break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: __ movsxw(out, Address(address, 0)); break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ movl(out, Address(address, 0)); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ movq(out, Address(address, 0)); break; default: @@ -2017,52 +2012,51 @@ static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Asse } void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { - CreateIntToIntLocations(arena_, invoke); + CreateIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) { - GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } -static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1))); } -static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) { +static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) { CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>(); Location value = locations->InAt(1); // x86 allows unaligned access. We do not have to check the input or use specific instructions // to avoid a SIGBUS. switch (size) { - case Primitive::kPrimByte: + case DataType::Type::kInt8: if (value.IsConstant()) { __ movb(Address(address, 0), Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); @@ -2070,7 +2064,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Asse __ movb(Address(address, 0), value.AsRegister<CpuRegister>()); } break; - case Primitive::kPrimShort: + case DataType::Type::kInt16: if (value.IsConstant()) { __ movw(Address(address, 0), Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); @@ -2078,7 +2072,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Asse __ movw(Address(address, 0), value.AsRegister<CpuRegister>()); } break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (value.IsConstant()) { __ movl(Address(address, 0), Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant()))); @@ -2086,7 +2080,7 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Asse __ movl(Address(address, 0), value.AsRegister<CpuRegister>()); } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (value.IsConstant()) { int64_t v = value.GetConstant()->AsLongConstant()->GetValue(); DCHECK(IsInt<32>(v)); @@ -2103,41 +2097,40 @@ static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Asse } void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { - CreateIntIntToVoidLocations(arena_, invoke); + CreateIntIntToVoidLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) { - GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); + GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler()); } void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -2148,7 +2141,7 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) { } static void GenUnsafeGet(HInvoke* invoke, - Primitive::Type type, + DataType::Type type, bool is_volatile ATTRIBUTE_UNUSED, CodeGeneratorX86_64* codegen) { X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); @@ -2161,11 +2154,11 @@ static void GenUnsafeGet(HInvoke* invoke, CpuRegister output = output_loc.AsRegister<CpuRegister>(); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (kEmitCompilerReadBarrier) { if (kUseBakerReadBarrier) { Address src(base, offset, ScaleFactor::TIMES_1, 0); @@ -2183,7 +2176,7 @@ static void GenUnsafeGet(HInvoke* invoke, break; } - case Primitive::kPrimLong: + case DataType::Type::kInt64: __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0)); break; @@ -2193,15 +2186,16 @@ static void GenUnsafeGet(HInvoke* invoke, } } -static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { +static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject || invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); if (can_call && kUseBakerReadBarrier) { locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers. } @@ -2213,56 +2207,55 @@ static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - CreateIntIntIntToIntLocations(arena_, invoke); + CreateIntIntIntToIntLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) { - GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, - Primitive::Type type, +static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator, + DataType::Type type, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); locations->SetInAt(3, Location::RequiresRegister()); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Need temp registers for card-marking. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. locations->AddTemp(Location::RequiresRegister()); @@ -2270,45 +2263,45 @@ static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke); + CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke); } // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86 // memory model. -static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile, +static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile, CodeGeneratorX86_64* codegen) { X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>(); - if (type == Primitive::kPrimLong) { + if (type == DataType::Type::kInt64) { __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value); - } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) { + } else if (kPoisonHeapReferences && type == DataType::Type::kReference) { CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>(); __ movl(temp, value); __ PoisonHeapReference(temp); @@ -2321,7 +2314,7 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool codegen->MemoryFence(); } - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { bool value_can_be_null = true; // TODO: Worth finding out this information? codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(), locations->GetTemp(1).AsRegister<CpuRegister>(), @@ -2332,44 +2325,48 @@ static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool } void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_); + GenUnsafePut( + invoke->GetLocations(), DataType::Type::kReference, /* is_volatile */ true, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ false, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) { - GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_); + GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /* is_volatile */ true, codegen_); } -static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, - Primitive::Type type, +static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator, + DataType::Type type, HInvoke* invoke) { bool can_call = kEmitCompilerReadBarrier && kUseBakerReadBarrier && (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject); - LocationSummary* locations = new (arena) LocationSummary(invoke, - (can_call - ? LocationSummary::kCallOnSlowPath - : LocationSummary::kNoCall), - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, + can_call + ? LocationSummary::kCallOnSlowPath + : LocationSummary::kNoCall, + kIntrinsified); locations->SetInAt(0, Location::NoLocation()); // Unused receiver. locations->SetInAt(1, Location::RequiresRegister()); locations->SetInAt(2, Location::RequiresRegister()); @@ -2378,7 +2375,7 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, locations->SetInAt(4, Location::RequiresRegister()); locations->SetOut(Location::RequiresRegister()); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // Need temporary registers for card-marking, and possibly for // (Baker) read barrier. locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too. @@ -2387,11 +2384,11 @@ static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) { - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke); } void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { @@ -2401,10 +2398,10 @@ void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) { return; } - CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke); + CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke); } -static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { +static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) { X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler()); LocationSummary* locations = invoke->GetLocations(); @@ -2417,7 +2414,7 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c Location out_loc = locations->Out(); CpuRegister out = out_loc.AsRegister<CpuRegister>(); - if (type == Primitive::kPrimNot) { + if (type == DataType::Type::kReference) { // The only read barrier implementation supporting the // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); @@ -2499,9 +2496,9 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c __ UnpoisonHeapReference(expected); } } else { - if (type == Primitive::kPrimInt) { + if (type == DataType::Type::kInt32) { __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value); - } else if (type == Primitive::kPrimLong) { + } else if (type == DataType::Type::kInt64) { __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value); } else { LOG(FATAL) << "Unexpected CAS type " << type; @@ -2517,11 +2514,11 @@ static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* c } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) { - GenCAS(Primitive::kPrimInt, invoke, codegen_); + GenCAS(DataType::Type::kInt32, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) { - GenCAS(Primitive::kPrimLong, invoke, codegen_); + GenCAS(DataType::Type::kInt64, invoke, codegen_); } void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { @@ -2529,13 +2526,12 @@ void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) { // UnsafeCASObject intrinsic is the Baker-style read barriers. DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier); - GenCAS(Primitive::kPrimNot, invoke, codegen_); + GenCAS(DataType::Type::kReference, invoke, codegen_); } void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); @@ -2576,9 +2572,8 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::RequiresRegister()); locations->SetOut(Location::SameAsFirstInput()); locations->AddTemp(Location::RequiresRegister()); @@ -2621,15 +2616,14 @@ void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) { } static void CreateBitCountLocations( - ArenaAllocator* arena, CodeGeneratorX86_64* codegen, HInvoke* invoke) { + ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) { if (!codegen->GetInstructionSetFeatures().HasPopCnt()) { // Do nothing if there is no popcnt support. This results in generating // a call for the intrinsic rather than direct code. return; } - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister()); } @@ -2668,7 +2662,7 @@ static void GenBitCount(X86_64Assembler* assembler, } void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) { - CreateBitCountLocations(arena_, codegen_, invoke); + CreateBitCountLocations(allocator_, codegen_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { @@ -2676,17 +2670,16 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) { - CreateBitCountLocations(arena_, codegen_, invoke); + CreateBitCountLocations(allocator_, codegen_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) { GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateOneBitLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_high) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister()); locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL @@ -2783,7 +2776,7 @@ static void GenOneBit(X86_64Assembler* assembler, } void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(arena_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high */ true); } void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { @@ -2791,7 +2784,7 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) { - CreateOneBitLocations(arena_, invoke, /* is_high */ true); + CreateOneBitLocations(allocator_, invoke, /* is_high */ true); } void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { @@ -2799,7 +2792,7 @@ void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(arena_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high */ false); } void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { @@ -2807,17 +2800,16 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) { - CreateOneBitLocations(arena_, invoke, /* is_high */ false); + CreateOneBitLocations(allocator_, invoke, /* is_high */ false); } void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) { GenOneBit(GetAssembler(), codegen_, invoke, /* is_high */ false, /* is_long */ true); } -static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister()); } @@ -2873,7 +2865,7 @@ static void GenLeadingZeros(X86_64Assembler* assembler, } void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(arena_, invoke); + CreateLeadingZeroLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { @@ -2881,17 +2873,16 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* inv } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { - CreateLeadingZeroLocations(arena_, invoke); + CreateLeadingZeroLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true); } -static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke) { - LocationSummary* locations = new (arena) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); +static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) { + LocationSummary* locations = + new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetInAt(0, Location::Any()); locations->SetOut(Location::RequiresRegister()); } @@ -2942,7 +2933,7 @@ static void GenTrailingZeros(X86_64Assembler* assembler, } void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(arena_, invoke); + CreateTrailingZeroLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) { @@ -2950,7 +2941,7 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* in } void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { - CreateTrailingZeroLocations(arena_, invoke); + CreateTrailingZeroLocations(allocator_, invoke); } void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) { @@ -3025,9 +3016,8 @@ void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) { } void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) { - LocationSummary* locations = new (arena_) LocationSummary(invoke, - LocationSummary::kNoCall, - kIntrinsified); + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); locations->SetOut(Location::RequiresRegister()); } @@ -3045,6 +3035,14 @@ void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) { __ Bind(&done); } +void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) { + LocationSummary* locations = + new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified); + locations->SetInAt(0, Location::Any()); +} + +void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { } + UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent) UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite) UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite) diff --git a/compiler/optimizing/intrinsics_x86_64.h b/compiler/optimizing/intrinsics_x86_64.h index 97404aa568..5cb601edfe 100644 --- a/compiler/optimizing/intrinsics_x86_64.h +++ b/compiler/optimizing/intrinsics_x86_64.h @@ -39,7 +39,7 @@ class IntrinsicLocationsBuilderX86_64 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -49,8 +49,8 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) bool TryDispatch(HInvoke* invoke); private: - ArenaAllocator* arena_; - CodeGeneratorX86_64* codegen_; + ArenaAllocator* const allocator_; + CodeGeneratorX86_64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderX86_64); }; @@ -64,7 +64,7 @@ class IntrinsicCodeGeneratorX86_64 FINAL : public IntrinsicVisitor { #define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \ void Visit ## Name(HInvoke* invoke) OVERRIDE; #include "intrinsics_list.h" -INTRINSICS_LIST(OPTIMIZING_INTRINSICS) + INTRINSICS_LIST(OPTIMIZING_INTRINSICS) #undef INTRINSICS_LIST #undef OPTIMIZING_INTRINSICS @@ -73,7 +73,7 @@ INTRINSICS_LIST(OPTIMIZING_INTRINSICS) ArenaAllocator* GetAllocator(); - CodeGeneratorX86_64* codegen_; + CodeGeneratorX86_64* const codegen_; DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorX86_64); }; diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc index f0086fb202..d3a0376e9c 100644 --- a/compiler/optimizing/licm.cc +++ b/compiler/optimizing/licm.cc @@ -84,10 +84,10 @@ void LICM::Run() { // Only used during debug. ArenaBitVector* visited = nullptr; if (kIsDebugBuild) { - visited = new (graph_->GetArena()) ArenaBitVector(graph_->GetArena(), - graph_->GetBlocks().size(), - false, - kArenaAllocLICM); + visited = new (graph_->GetAllocator()) ArenaBitVector(graph_->GetAllocator(), + graph_->GetBlocks().size(), + false, + kArenaAllocLICM); } // Post order visit to visit inner loops before outer loops. @@ -129,10 +129,25 @@ void LICM::Run() { !inst_it.Done(); inst_it.Advance()) { HInstruction* instruction = inst_it.Current(); - if (instruction->CanBeMoved() - && (!instruction->CanThrow() || !found_first_non_hoisted_visible_instruction_in_loop) - && !instruction->GetSideEffects().MayDependOn(loop_effects) - && InputsAreDefinedBeforeLoop(instruction)) { + bool can_move = false; + if (instruction->CanBeMoved() && InputsAreDefinedBeforeLoop(instruction)) { + if (instruction->CanThrow()) { + if (!found_first_non_hoisted_visible_instruction_in_loop) { + DCHECK(instruction->GetBlock()->IsLoopHeader()); + if (instruction->IsClinitCheck()) { + // clinit is only done once, and since all visible instructions + // in the loop header so far have been hoisted out, we can hoist + // the clinit check out also. + can_move = true; + } else if (!instruction->GetSideEffects().MayDependOn(loop_effects)) { + can_move = true; + } + } + } else if (!instruction->GetSideEffects().MayDependOn(loop_effects)) { + can_move = true; + } + } + if (can_move) { // We need to update the environment if the instruction has a loop header // phi in it. if (instruction->NeedsEnvironment()) { @@ -141,8 +156,10 @@ void LICM::Run() { DCHECK(!instruction->HasEnvironment()); } instruction->MoveBefore(pre_header->GetLastInstruction()); - MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved); - } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) { + MaybeRecordStat(stats_, MethodCompilationStat::kLoopInvariantMoved); + } + + if (!can_move && (instruction->CanThrow() || instruction->DoesAnyWrite())) { // If `instruction` can do something visible (throw or write), // we cannot move further instructions that can throw. found_first_non_hoisted_visible_instruction_in_loop = true; diff --git a/compiler/optimizing/licm.h b/compiler/optimizing/licm.h index bf56f53d46..ee567aeb20 100644 --- a/compiler/optimizing/licm.h +++ b/compiler/optimizing/licm.h @@ -26,8 +26,11 @@ class SideEffectsAnalysis; class LICM : public HOptimization { public: - LICM(HGraph* graph, const SideEffectsAnalysis& side_effects, OptimizingCompilerStats* stats) - : HOptimization(graph, kLoopInvariantCodeMotionPassName, stats), + LICM(HGraph* graph, + const SideEffectsAnalysis& side_effects, + OptimizingCompilerStats* stats, + const char* name = kLoopInvariantCodeMotionPassName) + : HOptimization(graph, name, stats), side_effects_(side_effects) {} void Run() OVERRIDE; diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc index 8d15f78cce..adc3cabe87 100644 --- a/compiler/optimizing/licm_test.cc +++ b/compiler/optimizing/licm_test.cc @@ -14,9 +14,10 @@ * limitations under the License. */ +#include "licm.h" + #include "base/arena_allocator.h" #include "builder.h" -#include "licm.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "side_effects_analysis.h" @@ -26,12 +27,10 @@ namespace art { /** * Fixture class for the LICM tests. */ -class LICMTest : public CommonCompilerTest { +class LICMTest : public OptimizingUnitTest { public: LICMTest() - : pool_(), - allocator_(&pool_), - entry_(nullptr), + : entry_(nullptr), loop_preheader_(nullptr), loop_header_(nullptr), loop_body_(nullptr), @@ -40,7 +39,7 @@ class LICMTest : public CommonCompilerTest { parameter_(nullptr), int_constant_(nullptr), float_constant_(nullptr) { - graph_ = CreateGraph(&allocator_); + graph_ = CreateGraph(); } ~LICMTest() { } @@ -48,12 +47,12 @@ class LICMTest : public CommonCompilerTest { // Builds a singly-nested loop structure in CFG. Tests can further populate // the basic blocks with instructions to set up interesting scenarios. void BuildLoop() { - entry_ = new (&allocator_) HBasicBlock(graph_); - loop_preheader_ = new (&allocator_) HBasicBlock(graph_); - loop_header_ = new (&allocator_) HBasicBlock(graph_); - loop_body_ = new (&allocator_) HBasicBlock(graph_); - return_ = new (&allocator_) HBasicBlock(graph_); - exit_ = new (&allocator_) HBasicBlock(graph_); + entry_ = new (GetAllocator()) HBasicBlock(graph_); + loop_preheader_ = new (GetAllocator()) HBasicBlock(graph_); + loop_header_ = new (GetAllocator()) HBasicBlock(graph_); + loop_body_ = new (GetAllocator()) HBasicBlock(graph_); + return_ = new (GetAllocator()) HBasicBlock(graph_); + exit_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_); graph_->AddBlock(loop_preheader_); @@ -74,18 +73,18 @@ class LICMTest : public CommonCompilerTest { return_->AddSuccessor(exit_); // Provide boiler-plate instructions. - parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); entry_->AddInstruction(parameter_); int_constant_ = graph_->GetIntConstant(42); float_constant_ = graph_->GetFloatConstant(42.0f); - loop_preheader_->AddInstruction(new (&allocator_) HGoto()); - loop_header_->AddInstruction(new (&allocator_) HIf(parameter_)); - loop_body_->AddInstruction(new (&allocator_) HGoto()); - return_->AddInstruction(new (&allocator_) HReturnVoid()); - exit_->AddInstruction(new (&allocator_) HExit()); + loop_preheader_->AddInstruction(new (GetAllocator()) HGoto()); + loop_header_->AddInstruction(new (GetAllocator()) HIf(parameter_)); + loop_body_->AddInstruction(new (GetAllocator()) HGoto()); + return_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_->AddInstruction(new (GetAllocator()) HExit()); } // Performs LICM optimizations (after proper set up). @@ -97,8 +96,6 @@ class LICMTest : public CommonCompilerTest { } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; // Specific basic blocks. @@ -122,18 +119,18 @@ TEST_F(LICMTest, FieldHoisting) { BuildLoop(); // Populate the loop with instructions: set/get field with different types. - HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, - nullptr, - Primitive::kPrimLong, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); + HInstruction* get_field = new (GetAllocator()) HInstanceFieldGet(parameter_, + nullptr, + DataType::Type::kInt64, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); - HInstruction* set_field = new (&allocator_) HInstanceFieldSet( - parameter_, int_constant_, nullptr, Primitive::kPrimInt, MemberOffset(20), + HInstruction* set_field = new (GetAllocator()) HInstanceFieldSet( + parameter_, int_constant_, nullptr, DataType::Type::kInt32, MemberOffset(20), false, kUnknownFieldIndex, kUnknownClassDefIndex, graph_->GetDexFile(), 0); loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); @@ -149,26 +146,26 @@ TEST_F(LICMTest, NoFieldHoisting) { // Populate the loop with instructions: set/get field with same types. ScopedNullHandle<mirror::DexCache> dex_cache; - HInstruction* get_field = new (&allocator_) HInstanceFieldGet(parameter_, - nullptr, - Primitive::kPrimLong, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); + HInstruction* get_field = new (GetAllocator()) HInstanceFieldGet(parameter_, + nullptr, + DataType::Type::kInt64, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); loop_body_->InsertInstructionBefore(get_field, loop_body_->GetLastInstruction()); - HInstruction* set_field = new (&allocator_) HInstanceFieldSet(parameter_, - get_field, - nullptr, - Primitive::kPrimLong, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); + HInstruction* set_field = new (GetAllocator()) HInstanceFieldSet(parameter_, + get_field, + nullptr, + DataType::Type::kInt64, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); loop_body_->InsertInstructionBefore(set_field, loop_body_->GetLastInstruction()); EXPECT_EQ(get_field->GetBlock(), loop_body_); @@ -182,11 +179,11 @@ TEST_F(LICMTest, ArrayHoisting) { BuildLoop(); // Populate the loop with instructions: set/get array with different types. - HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, int_constant_, Primitive::kPrimInt, 0); + HInstruction* get_array = new (GetAllocator()) HArrayGet( + parameter_, int_constant_, DataType::Type::kInt32, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); - HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, int_constant_, float_constant_, Primitive::kPrimFloat, 0); + HInstruction* set_array = new (GetAllocator()) HArraySet( + parameter_, int_constant_, float_constant_, DataType::Type::kFloat32, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); @@ -200,11 +197,11 @@ TEST_F(LICMTest, NoArrayHoisting) { BuildLoop(); // Populate the loop with instructions: set/get array with same types. - HInstruction* get_array = new (&allocator_) HArrayGet( - parameter_, int_constant_, Primitive::kPrimFloat, 0); + HInstruction* get_array = new (GetAllocator()) HArrayGet( + parameter_, int_constant_, DataType::Type::kFloat32, 0); loop_body_->InsertInstructionBefore(get_array, loop_body_->GetLastInstruction()); - HInstruction* set_array = new (&allocator_) HArraySet( - parameter_, get_array, float_constant_, Primitive::kPrimFloat, 0); + HInstruction* set_array = new (GetAllocator()) HArraySet( + parameter_, get_array, float_constant_, DataType::Type::kFloat32, 0); loop_body_->InsertInstructionBefore(set_array, loop_body_->GetLastInstruction()); EXPECT_EQ(get_array->GetBlock(), loop_body_); diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc index 80cecd41dc..58e00a810d 100644 --- a/compiler/optimizing/linear_order.cc +++ b/compiler/optimizing/linear_order.cc @@ -16,6 +16,9 @@ #include "linear_order.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" + namespace art { static bool InSameLoop(HLoopInformation* first_loop, HLoopInformation* second_loop) { @@ -34,7 +37,8 @@ static bool IsInnerLoop(HLoopInformation* outer, HLoopInformation* inner) { } // Helper method to update work list for linear order. -static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasicBlock* block) { +static void AddToListForLinearization(ScopedArenaVector<HBasicBlock*>* worklist, + HBasicBlock* block) { HLoopInformation* block_loop = block->GetLoopInformation(); auto insert_pos = worklist->rbegin(); // insert_pos.base() will be the actual position. for (auto end = worklist->rend(); insert_pos != end; ++insert_pos) { @@ -51,7 +55,7 @@ static void AddToListForLinearization(ArenaVector<HBasicBlock*>* worklist, HBasi } // Helper method to validate linear order. -static bool IsLinearOrderWellFormed(const HGraph* graph, ArenaVector<HBasicBlock*>* linear_order) { +static bool IsLinearOrderWellFormed(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order) { for (HBasicBlock* header : graph->GetBlocks()) { if (header == nullptr || !header->IsLoopHeader()) { continue; @@ -59,7 +63,7 @@ static bool IsLinearOrderWellFormed(const HGraph* graph, ArenaVector<HBasicBlock HLoopInformation* loop = header->GetLoopInformation(); size_t num_blocks = loop->GetBlocks().NumSetBits(); size_t found_blocks = 0u; - for (HBasicBlock* block : *linear_order) { + for (HBasicBlock* block : linear_order) { if (loop->Contains(*block)) { found_blocks++; if (found_blocks == 1u && block != header) { @@ -79,10 +83,8 @@ static bool IsLinearOrderWellFormed(const HGraph* graph, ArenaVector<HBasicBlock return true; } -void LinearizeGraph(const HGraph* graph, - ArenaAllocator* allocator, - ArenaVector<HBasicBlock*>* linear_order) { - DCHECK(linear_order->empty()); +void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order) { + DCHECK_EQ(linear_order.size(), graph->GetReversePostOrder().size()); // Create a reverse post ordering with the following properties: // - Blocks in a loop are consecutive, // - Back-edge is the last block before loop exits. @@ -92,8 +94,9 @@ void LinearizeGraph(const HGraph* graph, // current reverse post order in the graph, but it would require making // order queries to a GrowableArray, which is not the best data structure // for it. - ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(), - allocator->Adapter(kArenaAllocLinearOrder)); + ScopedArenaAllocator allocator(graph->GetArenaStack()); + ScopedArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(), + allocator.Adapter(kArenaAllocLinearOrder)); for (HBasicBlock* block : graph->GetReversePostOrder()) { size_t number_of_forward_predecessors = block->GetPredecessors().size(); if (block->IsLoopHeader()) { @@ -105,13 +108,14 @@ void LinearizeGraph(const HGraph* graph, // iterate over the successors. When all non-back edge predecessors of a // successor block are visited, the successor block is added in the worklist // following an order that satisfies the requirements to build our linear graph. - linear_order->reserve(graph->GetReversePostOrder().size()); - ArenaVector<HBasicBlock*> worklist(allocator->Adapter(kArenaAllocLinearOrder)); + ScopedArenaVector<HBasicBlock*> worklist(allocator.Adapter(kArenaAllocLinearOrder)); worklist.push_back(graph->GetEntryBlock()); + size_t num_added = 0u; do { HBasicBlock* current = worklist.back(); worklist.pop_back(); - linear_order->push_back(current); + linear_order[num_added] = current; + ++num_added; for (HBasicBlock* successor : current->GetSuccessors()) { int block_id = successor->GetBlockId(); size_t number_of_remaining_predecessors = forward_predecessors[block_id]; @@ -121,6 +125,7 @@ void LinearizeGraph(const HGraph* graph, forward_predecessors[block_id] = number_of_remaining_predecessors - 1; } } while (!worklist.empty()); + DCHECK_EQ(num_added, linear_order.size()); DCHECK(graph->HasIrreducibleLoops() || IsLinearOrderWellFormed(graph, linear_order)); } diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h index 7122d67be9..151db001e1 100644 --- a/compiler/optimizing/linear_order.h +++ b/compiler/optimizing/linear_order.h @@ -17,10 +17,14 @@ #ifndef ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ #define ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_ +#include <type_traits> + #include "nodes.h" namespace art { +void LinearizeGraphInternal(const HGraph* graph, ArrayRef<HBasicBlock*> linear_order); + // Linearizes the 'graph' such that: // (1): a block is always after its dominator, // (2): blocks of loops are contiguous. @@ -32,9 +36,15 @@ namespace art { // // for (HBasicBlock* block : ReverseRange(linear_order)) // linear post order // -void LinearizeGraph(const HGraph* graph, - ArenaAllocator* allocator, - ArenaVector<HBasicBlock*>* linear_order); +template <typename Vector> +void LinearizeGraph(const HGraph* graph, Vector* linear_order) { + static_assert(std::is_same<HBasicBlock*, typename Vector::value_type>::value, + "Vector::value_type must be HBasicBlock*."); + // Resize the vector and pass an ArrayRef<> to internal implementation which is shared + // for all kinds of vectors, i.e. ArenaVector<> or ScopedArenaVector<>. + linear_order->resize(graph->GetReversePostOrder().size()); + LinearizeGraphInternal(graph, ArrayRef<HBasicBlock*>(*linear_order)); +} } // namespace art diff --git a/compiler/optimizing/linearize_test.cc b/compiler/optimizing/linearize_test.cc index 3831aa6c91..9fa5b74c62 100644 --- a/compiler/optimizing/linearize_test.cc +++ b/compiler/optimizing/linearize_test.cc @@ -21,8 +21,8 @@ #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "driver/compiler_options.h" #include "graph_visualizer.h" #include "nodes.h" @@ -32,17 +32,21 @@ namespace art { -class LinearizeTest : public CommonCompilerTest {}; +class LinearizeTest : public OptimizingUnitTest { + protected: + template <size_t number_of_blocks> + void TestCode(const std::vector<uint16_t>& data, + const uint32_t (&expected_order)[number_of_blocks]); +}; template <size_t number_of_blocks> -static void TestCode(const uint16_t* data, const uint32_t (&expected_order)[number_of_blocks]) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +void LinearizeTest::TestCode(const std::vector<uint16_t>& data, + const uint32_t (&expected_order)[number_of_blocks]) { + HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); ASSERT_EQ(graph->GetLinearOrder().size(), number_of_blocks); @@ -65,7 +69,7 @@ TEST_F(LinearizeTest, CFG1) { // + / \ + // Block4 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::IF_EQ, 0xFFFE, @@ -90,7 +94,7 @@ TEST_F(LinearizeTest, CFG2) { // + / \ + // Block5 Block8 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -116,7 +120,7 @@ TEST_F(LinearizeTest, CFG3) { // Block6 + Block9 // | + // Block4 ++ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::RETURN_VOID, @@ -146,7 +150,7 @@ TEST_F(LinearizeTest, CFG4) { // + / \ + // Block5 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 7, Instruction::IF_EQ, 0xFFFE, @@ -176,7 +180,7 @@ TEST_F(LinearizeTest, CFG5) { // +/ \ + // Block6 Block11 */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::RETURN_VOID, @@ -202,7 +206,7 @@ TEST_F(LinearizeTest, CFG6) { // Block5 <- Block9 Block6 + // | // Block7 - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0004, @@ -230,7 +234,7 @@ TEST_F(LinearizeTest, CFG7) { // | // Block7 // - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x0100, Instruction::IF_EQ, 0x0005, diff --git a/compiler/optimizing/live_interval_test.cc b/compiler/optimizing/live_interval_test.cc index 405f261986..c60386d7b7 100644 --- a/compiler/optimizing/live_interval_test.cc +++ b/compiler/optimizing/live_interval_test.cc @@ -23,29 +23,29 @@ namespace art { TEST(LiveInterval, GetStart) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { static constexpr size_t ranges[][2] = {{0, 42}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_EQ(0u, interval->GetStart()); } { static constexpr size_t ranges[][2] = {{4, 12}, {14, 16}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_EQ(4u, interval->GetStart()); } } TEST(LiveInterval, IsDeadAt) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { static constexpr size_t ranges[][2] = {{0, 42}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_TRUE(interval->IsDeadAt(42)); ASSERT_TRUE(interval->IsDeadAt(43)); ASSERT_FALSE(interval->IsDeadAt(41)); @@ -55,7 +55,7 @@ TEST(LiveInterval, IsDeadAt) { { static constexpr size_t ranges[][2] = {{4, 12}, {14, 16}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_TRUE(interval->IsDeadAt(16)); ASSERT_TRUE(interval->IsDeadAt(32)); ASSERT_FALSE(interval->IsDeadAt(0)); @@ -68,12 +68,12 @@ TEST(LiveInterval, IsDeadAt) { } TEST(LiveInterval, Covers) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { static constexpr size_t ranges[][2] = {{0, 42}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_TRUE(interval->Covers(0)); ASSERT_TRUE(interval->Covers(4)); ASSERT_TRUE(interval->Covers(41)); @@ -83,7 +83,7 @@ TEST(LiveInterval, Covers) { { static constexpr size_t ranges[][2] = {{4, 12}, {14, 16}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); ASSERT_FALSE(interval->Covers(0)); ASSERT_TRUE(interval->Covers(4)); ASSERT_TRUE(interval->Covers(11)); @@ -96,68 +96,68 @@ TEST(LiveInterval, Covers) { } TEST(LiveInterval, FirstIntersectionWith) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { static constexpr size_t ranges1[][2] = {{0, 4}, {8, 10}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{5, 6}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(kNoLifetime, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 4}, {8, 10}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{5, 42}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(8u, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 4}, {8, 10}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{5, 6}, {7, 8}, {11, 12}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(kNoLifetime, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 4}, {8, 10}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{5, 6}, {7, 8}, {9, 10}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(9u, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 1}, {2, 7}, {8, 10}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{1, 2}, {6, 7}, {9, 10}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(6u, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 1}, {2, 8}, {55, 58}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{1, 2}, {11, 42}, {43, 48}, {54, 56}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(55u, interval1->FirstIntersectionWith(interval2)); } { static constexpr size_t ranges1[][2] = {{0, 1}, {2, 8}, {15, 18}, {27, 32}, {41, 53}, {54, 60}}; - LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), &allocator); + LiveInterval* interval1 = BuildInterval(ranges1, arraysize(ranges1), allocator); static constexpr size_t ranges2[][2] = {{1, 2}, {11, 12}, {19, 25}, {34, 42}, {52, 60}}; - LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), &allocator); + LiveInterval* interval2 = BuildInterval(ranges2, arraysize(ranges2), allocator); ASSERT_EQ(41u, interval1->FirstIntersectionWith(interval2)); } @@ -188,13 +188,13 @@ static bool RangesEquals(LiveInterval* interval, } TEST(LiveInterval, SplitAt) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { // Test within one range. static constexpr size_t ranges[][2] = {{0, 4}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(1); static constexpr size_t expected[][2] = {{0, 1}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -205,7 +205,7 @@ TEST(LiveInterval, SplitAt) { { // Test just before the end of one range. static constexpr size_t ranges[][2] = {{0, 4}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(3); static constexpr size_t expected[][2] = {{0, 3}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -216,7 +216,7 @@ TEST(LiveInterval, SplitAt) { { // Test withing the first range. static constexpr size_t ranges[][2] = {{0, 4}, {8, 12}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(1); static constexpr size_t expected[][2] = {{0, 1}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -227,7 +227,7 @@ TEST(LiveInterval, SplitAt) { { // Test in a hole. static constexpr size_t ranges[][2] = {{0, 4}, {8, 12}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(5); static constexpr size_t expected[][2] = {{0, 4}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -238,7 +238,7 @@ TEST(LiveInterval, SplitAt) { { // Test withing the second range. static constexpr size_t ranges[][2] = {{0, 4}, {8, 12}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(9); static constexpr size_t expected[][2] = {{0, 4}, {8, 9}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -249,7 +249,7 @@ TEST(LiveInterval, SplitAt) { { // Test at the beginning of the second range. static constexpr size_t ranges[][2] = {{0, 4}, {6, 10}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(6); static constexpr size_t expected[][2] = {{0, 4}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -260,7 +260,7 @@ TEST(LiveInterval, SplitAt) { { // Test at the end of the first range. static constexpr size_t ranges[][2] = {{0, 4}, {6, 10}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(4); static constexpr size_t expected[][2] = {{0, 4}}; ASSERT_TRUE(RangesEquals(interval, expected, arraysize(expected))); @@ -271,7 +271,7 @@ TEST(LiveInterval, SplitAt) { { // Test that we get null if we split at a position where the interval is dead. static constexpr size_t ranges[][2] = {{0, 4}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); LiveInterval* split = interval->SplitAt(5); ASSERT_TRUE(split == nullptr); ASSERT_TRUE(RangesEquals(interval, ranges, arraysize(ranges))); @@ -279,13 +279,13 @@ TEST(LiveInterval, SplitAt) { } TEST(LiveInterval, AddLoopRange) { - ArenaPool pool; - ArenaAllocator allocator(&pool); + ArenaPoolAndAllocator pool; + ScopedArenaAllocator* allocator = pool.GetScopedAllocator(); { // Test when only used in a loop. static constexpr size_t ranges[][2] = {{0, 4}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); interval->AddLoopRange(0, 8); LiveRange* range = interval->GetFirstRange(); ASSERT_TRUE(range->GetNext() == nullptr); @@ -296,7 +296,7 @@ TEST(LiveInterval, AddLoopRange) { { // Test when only used in a loop. static constexpr size_t ranges[][2] = {{2, 4}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); interval->AddLoopRange(0, 8); LiveRange* range = interval->GetFirstRange(); ASSERT_TRUE(range->GetNext() == nullptr); @@ -307,7 +307,7 @@ TEST(LiveInterval, AddLoopRange) { { // Test when used just after the loop. static constexpr size_t ranges[][2] = {{2, 4}, {8, 10}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); interval->AddLoopRange(0, 8); LiveRange* range = interval->GetFirstRange(); ASSERT_TRUE(range->GetNext() == nullptr); @@ -318,7 +318,7 @@ TEST(LiveInterval, AddLoopRange) { { // Test when use after the loop is after a lifetime hole. static constexpr size_t ranges[][2] = {{2, 4}, {10, 12}}; - LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), &allocator); + LiveInterval* interval = BuildInterval(ranges, arraysize(ranges), allocator); interval->AddLoopRange(0, 8); LiveRange* range = interval->GetFirstRange(); ASSERT_EQ(range->GetStart(), 0u); diff --git a/compiler/optimizing/live_ranges_test.cc b/compiler/optimizing/live_ranges_test.cc index f9a955fb0a..66660662e4 100644 --- a/compiler/optimizing/live_ranges_test.cc +++ b/compiler/optimizing/live_ranges_test.cc @@ -19,8 +19,8 @@ #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -29,10 +29,13 @@ namespace art { -class LiveRangesTest : public CommonCompilerTest {}; +class LiveRangesTest : public OptimizingUnitTest { + public: + HGraph* BuildGraph(const std::vector<uint16_t>& data); +}; -static HGraph* BuildGraph(const uint16_t* data, ArenaAllocator* allocator) { - HGraph* graph = CreateCFG(allocator, data); +HGraph* LiveRangesTest::BuildGraph(const std::vector<uint16_t>& data) { + HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); @@ -54,18 +57,16 @@ TEST_F(LiveRangesTest, CFG1) { * | * 12: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -101,19 +102,17 @@ TEST_F(LiveRangesTest, CFG2) { * | * 26: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, Instruction::RETURN | 0 << 8); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); LiveInterval* interval = liveness.GetInstructionFromSsaIndex(0)->GetLiveInterval(); @@ -152,19 +151,17 @@ TEST_F(LiveRangesTest, CFG3) { * | * 28: exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, Instruction::RETURN | 0 << 8); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Test for the 4 constant. @@ -228,7 +225,7 @@ TEST_F(LiveRangesTest, Loop1) { * 30: exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -236,14 +233,12 @@ TEST_F(LiveRangesTest, Loop1) { Instruction::CONST_4 | 5 << 12 | 1 << 8, Instruction::RETURN | 1 << 8); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); RemoveSuspendChecks(graph); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -309,20 +304,18 @@ TEST_F(LiveRangesTest, Loop2) { * We want to make sure the phi at 10 has a lifetime hole after the add at 20. */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, Instruction::GOTO | 0xFB00, Instruction::RETURN | 0 << 8); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. @@ -385,7 +378,7 @@ TEST_F(LiveRangesTest, CFG4) { * * We want to make sure the constant0 has a lifetime hole after the 16: add. */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::IF_EQ, 5, @@ -394,13 +387,11 @@ TEST_F(LiveRangesTest, CFG4) { Instruction::ADD_INT, 1 << 8, Instruction::RETURN); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = BuildGraph(data, &allocator); + HGraph* graph = BuildGraph(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Test for the 0 constant. diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc index 37b58ded59..6621a03568 100644 --- a/compiler/optimizing/liveness_test.cc +++ b/compiler/optimizing/liveness_test.cc @@ -19,8 +19,8 @@ #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" @@ -29,7 +29,10 @@ namespace art { -class LivenessTest : public CommonCompilerTest {}; +class LivenessTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, const char* expected); +}; static void DumpBitVector(BitVector* vector, std::ostream& buffer, @@ -43,16 +46,14 @@ static void DumpBitVector(BitVector* vector, buffer << ")\n"; } -static void TestCode(const uint16_t* data, const char* expected) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +void LivenessTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { + HGraph* graph = CreateCFG(data); // `Inline` conditions into ifs. PrepareForRegisterAllocation(graph).Run(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); std::ostringstream buffer; @@ -85,7 +86,7 @@ TEST_F(LivenessTest, CFG1) { " kill: (0)\n"; // Constant is not used. - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -107,7 +108,7 @@ TEST_F(LivenessTest, CFG2) { " live out: (0)\n" " kill: (0)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -133,7 +134,7 @@ TEST_F(LivenessTest, CFG3) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 3 << 12 | 0, Instruction::CONST_4 | 4 << 12 | 1 << 8, Instruction::ADD_INT_2ADDR | 1 << 12, @@ -180,7 +181,7 @@ TEST_F(LivenessTest, CFG4) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -227,7 +228,7 @@ TEST_F(LivenessTest, CFG5) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -272,7 +273,7 @@ TEST_F(LivenessTest, Loop1) { " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -317,7 +318,7 @@ TEST_F(LivenessTest, Loop3) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -369,7 +370,7 @@ TEST_F(LivenessTest, Loop4) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -424,7 +425,7 @@ TEST_F(LivenessTest, Loop5) { " live out: (0001)\n" " kill: (0001)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -474,7 +475,7 @@ TEST_F(LivenessTest, Loop6) { " live out: (0000)\n" " kill: (0000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -529,7 +530,7 @@ TEST_F(LivenessTest, Loop7) { " live out: (00000)\n" " kill: (00000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -579,7 +580,7 @@ TEST_F(LivenessTest, Loop8) { " live out: (000)\n" " kill: (000)\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 6, Instruction::ADD_INT, 0, 0, diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc index 5a8ac59195..8b1812a6de 100644 --- a/compiler/optimizing/load_store_analysis.cc +++ b/compiler/optimizing/load_store_analysis.cc @@ -22,111 +22,130 @@ namespace art { // The number of heap locations for most of the methods stays below this threshold. constexpr size_t kMaxNumberOfHeapLocations = 32; -// Check if array indices array[idx1 +/- CONST] and array[idx2] MAY alias. -static bool BinaryOpAndIndexMayAlias(const HBinaryOperation* idx1, const HInstruction* idx2) { - DCHECK(idx1 != nullptr); - DCHECK(idx2 != nullptr); +// Test if two integer ranges [l1,h1] and [l2,h2] overlap. +// Note that the ranges are inclusive on both ends. +// l1|------|h1 +// l2|------|h2 +static bool CanIntegerRangesOverlap(int64_t l1, int64_t h1, int64_t l2, int64_t h2) { + return std::max(l1, l2) <= std::min(h1, h2); +} - if (!idx1->IsAdd() && !idx1->IsSub()) { +static bool IsAddOrSub(const HInstruction* instruction) { + return instruction->IsAdd() || instruction->IsSub(); +} + +static bool CanBinaryOpAndIndexAlias(const HBinaryOperation* idx1, + const size_t vector_length1, + const HInstruction* idx2, + const size_t vector_length2) { + if (!IsAddOrSub(idx1)) { // We currently only support Add and Sub operations. return true; } - - HConstant* cst = idx1->GetConstantRight(); - if (cst == nullptr || cst->IsArithmeticZero()) { + if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != idx2) { + // Cannot analyze [i+CONST1] and [j]. return true; } - - if (idx1->GetLeastConstantLeft() == idx2) { - // for example, array[idx1 + 1] and array[idx1] - return false; + if (!idx1->GetConstantRight()->IsIntConstant()) { + return true; } - return true; + // Since 'i' are the same in [i+CONST] and [i], + // further compare [CONST] and [0]. + int64_t l1 = idx1->IsAdd() ? + idx1->GetConstantRight()->AsIntConstant()->GetValue() : + -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l2 = 0; + int64_t h1 = l1 + (vector_length1 - 1); + int64_t h2 = l2 + (vector_length2 - 1); + return CanIntegerRangesOverlap(l1, h1, l2, h2); } -// Check if Add and Sub MAY alias when used as indices in arrays. -static bool BinaryOpsMayAlias(const HBinaryOperation* idx1, const HBinaryOperation* idx2) { - DCHECK(idx1!= nullptr); - DCHECK(idx2 != nullptr); - - HConstant* idx1_cst = idx1->GetConstantRight(); - HInstruction* idx1_other = idx1->GetLeastConstantLeft(); - HConstant* idx2_cst = idx2->GetConstantRight(); - HInstruction* idx2_other = idx2->GetLeastConstantLeft(); - - if (idx1_cst == nullptr || idx1_other == nullptr || - idx2_cst == nullptr || idx2_other == nullptr) { - // We only analyze patterns like [i +/- CONST]. +static bool CanBinaryOpsAlias(const HBinaryOperation* idx1, + const size_t vector_length1, + const HBinaryOperation* idx2, + const size_t vector_length2) { + if (!IsAddOrSub(idx1) || !IsAddOrSub(idx2)) { + // We currently only support Add and Sub operations. return true; } - - if (idx1_other != idx2_other) { - // For example, [j+1] and [k+1] MAY alias. + if (idx1->AsBinaryOperation()->GetLeastConstantLeft() != + idx2->AsBinaryOperation()->GetLeastConstantLeft()) { + // Cannot analyze [i+CONST1] and [j+CONST2]. return true; } - - if ((idx1->IsAdd() && idx2->IsAdd()) || - (idx1->IsSub() && idx2->IsSub())) { - return idx1_cst->AsIntConstant()->GetValue() == idx2_cst->AsIntConstant()->GetValue(); - } - - if ((idx1->IsAdd() && idx2->IsSub()) || - (idx1->IsSub() && idx2->IsAdd())) { - // [i + CONST1] and [i - CONST2] MAY alias iff CONST1 == -CONST2. - // By checking CONST1 == -CONST2, following cases are handled: - // - Zero constants case [i+0] and [i-0] is handled. - // - Overflow cases are handled, for example: - // [i+0x80000000] and [i-0x80000000]; - // [i+0x10] and [i-0xFFFFFFF0]. - // - Other cases [i+CONST1] and [i-CONST2] without any overflow is handled. - return idx1_cst->AsIntConstant()->GetValue() == -(idx2_cst->AsIntConstant()->GetValue()); + if (!idx1->GetConstantRight()->IsIntConstant() || + !idx2->GetConstantRight()->IsIntConstant()) { + return true; } - // All other cases, MAY alias. - return true; + // Since 'i' are the same in [i+CONST1] and [i+CONST2], + // further compare [CONST1] and [CONST2]. + int64_t l1 = idx1->IsAdd() ? + idx1->GetConstantRight()->AsIntConstant()->GetValue() : + -idx1->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t l2 = idx2->IsAdd() ? + idx2->GetConstantRight()->AsIntConstant()->GetValue() : + -idx2->GetConstantRight()->AsIntConstant()->GetValue(); + int64_t h1 = l1 + (vector_length1 - 1); + int64_t h2 = l2 + (vector_length2 - 1); + return CanIntegerRangesOverlap(l1, h1, l2, h2); } -// The following array index cases are handled: -// [i] and [i] -// [CONST1] and [CONST2] -// [i] and [i+CONST] -// [i] and [i-CONST] -// [i+CONST1] and [i+CONST2] -// [i-CONST1] and [i-CONST2] -// [i+CONST1] and [i-CONST2] -// [i-CONST1] and [i+CONST2] -// For other complicated cases, we rely on other passes like GVN and simpilfier -// to optimize these cases before this pass. -// For example: [i+j+k+10] and [i+k+10+j] shall be optimized to [i7+10] and [i7+10]. -bool HeapLocationCollector::CanArrayIndicesAlias(const HInstruction* idx1, - const HInstruction* idx2) const { +bool HeapLocationCollector::CanArrayElementsAlias(const HInstruction* idx1, + const size_t vector_length1, + const HInstruction* idx2, + const size_t vector_length2) const { DCHECK(idx1 != nullptr); DCHECK(idx2 != nullptr); + DCHECK_GE(vector_length1, HeapLocation::kScalar); + DCHECK_GE(vector_length2, HeapLocation::kScalar); + // [i] and [i]. if (idx1 == idx2) { - // [i] and [i] return true; } - if (idx1->IsIntConstant() && idx2->IsIntConstant()) { - // [CONST1] and [CONST2] - return idx1->AsIntConstant()->GetValue() == idx2->AsIntConstant()->GetValue(); - } - - if (idx1->IsBinaryOperation() && !BinaryOpAndIndexMayAlias(idx1->AsBinaryOperation(), idx2)) { - // [i] and [i+/-CONST] - return false; - } - if (idx2->IsBinaryOperation() && !BinaryOpAndIndexMayAlias(idx2->AsBinaryOperation(), idx1)) { - // [i+/-CONST] and [i] - return false; - } - if (idx1->IsBinaryOperation() && idx2->IsBinaryOperation()) { - // [i+/-CONST1] and [i+/-CONST2] - if (!BinaryOpsMayAlias(idx1->AsBinaryOperation(), idx2->AsBinaryOperation())) { - return false; - } + // [CONST1] and [CONST2]. + if (idx1->IsIntConstant() && idx2->IsIntConstant()) { + int64_t l1 = idx1->AsIntConstant()->GetValue(); + int64_t l2 = idx2->AsIntConstant()->GetValue(); + // To avoid any overflow in following CONST+vector_length calculation, + // use int64_t instead of int32_t. + int64_t h1 = l1 + (vector_length1 - 1); + int64_t h2 = l2 + (vector_length2 - 1); + return CanIntegerRangesOverlap(l1, h1, l2, h2); + } + + // [i+CONST] and [i]. + if (idx1->IsBinaryOperation() && + idx1->AsBinaryOperation()->GetConstantRight() != nullptr && + idx1->AsBinaryOperation()->GetLeastConstantLeft() == idx2) { + return CanBinaryOpAndIndexAlias(idx1->AsBinaryOperation(), + vector_length1, + idx2, + vector_length2); + } + + // [i] and [i+CONST]. + if (idx2->IsBinaryOperation() && + idx2->AsBinaryOperation()->GetConstantRight() != nullptr && + idx2->AsBinaryOperation()->GetLeastConstantLeft() == idx1) { + return CanBinaryOpAndIndexAlias(idx2->AsBinaryOperation(), + vector_length2, + idx1, + vector_length1); + } + + // [i+CONST1] and [i+CONST2]. + if (idx1->IsBinaryOperation() && + idx1->AsBinaryOperation()->GetConstantRight() != nullptr && + idx2->IsBinaryOperation() && + idx2->AsBinaryOperation()->GetConstantRight() != nullptr) { + return CanBinaryOpsAlias(idx1->AsBinaryOperation(), + vector_length1, + idx2->AsBinaryOperation(), + vector_length2); } // By default, MAY alias. diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h index a2c17944f2..437e6be418 100644 --- a/compiler/optimizing/load_store_analysis.h +++ b/compiler/optimizing/load_store_analysis.h @@ -25,15 +25,14 @@ namespace art { // A ReferenceInfo contains additional info about a reference such as // whether it's a singleton, returned, etc. -class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { +class ReferenceInfo : public ArenaObject<kArenaAllocLSA> { public: ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos), is_singleton_(true), is_singleton_and_not_returned_(true), - is_singleton_and_not_deopt_visible_(true), - has_index_aliasing_(false) { + is_singleton_and_not_deopt_visible_(true) { CalculateEscape(reference_, nullptr, &is_singleton_, @@ -70,16 +69,6 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_); } - bool HasIndexAliasing() { - return has_index_aliasing_; - } - - void SetHasIndexAliasing(bool has_index_aliasing) { - // Only allow setting to true. - DCHECK(has_index_aliasing); - has_index_aliasing_ = has_index_aliasing; - } - private: HInstruction* const reference_; const size_t position_; // position in HeapLocationCollector's ref_info_array_. @@ -90,35 +79,36 @@ class ReferenceInfo : public ArenaObject<kArenaAllocMisc> { bool is_singleton_and_not_returned_; // Is singleton and not used as an environment local of HDeoptimize. bool is_singleton_and_not_deopt_visible_; - // Some heap locations with reference_ have array index aliasing, - // e.g. arr[i] and arr[j] may be the same location. - bool has_index_aliasing_; DISALLOW_COPY_AND_ASSIGN(ReferenceInfo); }; // A heap location is a reference-offset/index pair that a value can be loaded from // or stored to. -class HeapLocation : public ArenaObject<kArenaAllocMisc> { +class HeapLocation : public ArenaObject<kArenaAllocLSA> { public: static constexpr size_t kInvalidFieldOffset = -1; - + // Default value for heap locations which are not vector data. + static constexpr size_t kScalar = 1; // TODO: more fine-grained array types. static constexpr int16_t kDeclaringClassDefIndexForArrays = -1; HeapLocation(ReferenceInfo* ref_info, size_t offset, HInstruction* index, + size_t vector_length, int16_t declaring_class_def_index) : ref_info_(ref_info), offset_(offset), index_(index), + vector_length_(vector_length), declaring_class_def_index_(declaring_class_def_index), - value_killed_by_loop_side_effects_(true) { + value_killed_by_loop_side_effects_(true), + has_aliased_locations_(false) { DCHECK(ref_info != nullptr); DCHECK((offset == kInvalidFieldOffset && index != nullptr) || (offset != kInvalidFieldOffset && index == nullptr)); - if (ref_info->IsSingleton() && !IsArrayElement()) { + if (ref_info->IsSingleton() && !IsArray()) { // Assume this location's value cannot be killed by loop side effects // until proven otherwise. value_killed_by_loop_side_effects_ = false; @@ -128,6 +118,7 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { ReferenceInfo* GetReferenceInfo() const { return ref_info_; } size_t GetOffset() const { return offset_; } HInstruction* GetIndex() const { return index_; } + size_t GetVectorLength() const { return vector_length_; } // Returns the definition of declaring class' dex index. // It's kDeclaringClassDefIndexForArrays for an array element. @@ -135,7 +126,7 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { return declaring_class_def_index_; } - bool IsArrayElement() const { + bool IsArray() const { return index_ != nullptr; } @@ -147,16 +138,40 @@ class HeapLocation : public ArenaObject<kArenaAllocMisc> { value_killed_by_loop_side_effects_ = val; } + bool HasAliasedLocations() const { + return has_aliased_locations_; + } + + void SetHasAliasedLocations(bool val) { + has_aliased_locations_ = val; + } + private: - ReferenceInfo* const ref_info_; // reference for instance/static field or array access. - const size_t offset_; // offset of static/instance field. - HInstruction* const index_; // index of an array element. - const int16_t declaring_class_def_index_; // declaring class's def's dex index. - bool value_killed_by_loop_side_effects_; // value of this location may be killed by loop - // side effects because this location is stored - // into inside a loop. This gives - // better info on whether a singleton's location - // value may be killed by loop side effects. + // Reference for instance/static field, array element or vector data. + ReferenceInfo* const ref_info_; + // Offset of static/instance field. + // Invalid when this HeapLocation is not field. + const size_t offset_; + // Index of an array element or starting index of vector data. + // Invalid when this HeapLocation is not array. + HInstruction* const index_; + // Vector length of vector data. + // When this HeapLocation is not vector data, it's value is kScalar. + const size_t vector_length_; + // Declaring class's def's dex index. + // Invalid when this HeapLocation is not field access. + const int16_t declaring_class_def_index_; + + // Value of this location may be killed by loop side effects + // because this location is stored into inside a loop. + // This gives better info on whether a singleton's location + // value may be killed by loop side effects. + bool value_killed_by_loop_side_effects_; + + // Has aliased heap locations in the method, due to either the + // reference is aliased or the array element is aliased via different + // index names. + bool has_aliased_locations_; DISALLOW_COPY_AND_ASSIGN(HeapLocation); }; @@ -172,12 +187,12 @@ class HeapLocationCollector : public HGraphVisitor { explicit HeapLocationCollector(HGraph* graph) : HGraphVisitor(graph), - ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)), - heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)), - aliasing_matrix_(graph->GetArena(), + ref_info_array_(graph->GetAllocator()->Adapter(kArenaAllocLSA)), + heap_locations_(graph->GetAllocator()->Adapter(kArenaAllocLSA)), + aliasing_matrix_(graph->GetAllocator(), kInitialAliasingMatrixBitVectorSize, true, - kArenaAllocLSE), + kArenaAllocLSA), has_heap_stores_(false), has_volatile_(false), has_monitor_operations_(false) {} @@ -196,8 +211,12 @@ class HeapLocationCollector : public HGraphVisitor { } HInstruction* HuntForOriginalReference(HInstruction* ref) const { + // An original reference can be transformed by instructions like: + // i0 NewArray + // i1 HInstruction(i0) <-- NullCheck, BoundType, IntermediateAddress. + // i2 ArrayGet(i1, index) DCHECK(ref != nullptr); - while (ref->IsNullCheck() || ref->IsBoundType()) { + while (ref->IsNullCheck() || ref->IsBoundType() || ref->IsIntermediateAddress()) { ref = ref->InputAt(0); } return ref; @@ -214,14 +233,26 @@ class HeapLocationCollector : public HGraphVisitor { return nullptr; } - size_t GetArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const { + size_t GetFieldHeapLocation(HInstruction* object, const FieldInfo* field) const { + DCHECK(object != nullptr); + DCHECK(field != nullptr); + return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(object)), + field->GetFieldOffset().SizeValue(), + nullptr, + HeapLocation::kScalar, + field->GetDeclaringClassDefIndex()); + } + + size_t GetArrayHeapLocation(HInstruction* array, + HInstruction* index, + size_t vector_length = HeapLocation::kScalar) const { DCHECK(array != nullptr); DCHECK(index != nullptr); - HInstruction* original_ref = HuntForOriginalReference(array); - ReferenceInfo* ref_info = FindReferenceInfoOf(original_ref); - return FindHeapLocationIndex(ref_info, + DCHECK_GE(vector_length, HeapLocation::kScalar); + return FindHeapLocationIndex(FindReferenceInfoOf(HuntForOriginalReference(array)), HeapLocation::kInvalidFieldOffset, index, + vector_length, HeapLocation::kDeclaringClassDefIndexForArrays); } @@ -238,15 +269,26 @@ class HeapLocationCollector : public HGraphVisitor { } // Find and return the heap location index in heap_locations_. + // NOTE: When heap locations are created, potentially aliasing/overlapping + // accesses are given different indexes. This find function also + // doesn't take aliasing/overlapping into account. For example, + // this function returns three different indexes for: + // - ref_info=array, index=i, vector_length=kScalar; + // - ref_info=array, index=i, vector_length=2; + // - ref_info=array, index=i, vector_length=4; + // In later analysis, ComputeMayAlias() and MayAlias() compute and tell whether + // these indexes alias. size_t FindHeapLocationIndex(ReferenceInfo* ref_info, size_t offset, HInstruction* index, + size_t vector_length, int16_t declaring_class_def_index) const { for (size_t i = 0; i < heap_locations_.size(); i++) { HeapLocation* loc = heap_locations_[i]; if (loc->GetReferenceInfo() == ref_info && loc->GetOffset() == offset && loc->GetIndex() == index && + loc->GetVectorLength() == vector_length && loc->GetDeclaringClassDefIndex() == declaring_class_def_index) { return i; } @@ -311,7 +353,10 @@ class HeapLocationCollector : public HGraphVisitor { return true; } - bool CanArrayIndicesAlias(const HInstruction* i1, const HInstruction* i2) const; + bool CanArrayElementsAlias(const HInstruction* idx1, + const size_t vector_length1, + const HInstruction* idx2, + const size_t vector_length2) const; // `index1` and `index2` are indices in the array of collected heap locations. // Returns the position in the bit vector that tracks whether the two heap @@ -332,11 +377,12 @@ class HeapLocationCollector : public HGraphVisitor { // Compute if two locations may alias to each other. bool ComputeMayAlias(size_t index1, size_t index2) const { + DCHECK_NE(index1, index2); HeapLocation* loc1 = heap_locations_[index1]; HeapLocation* loc2 = heap_locations_[index2]; if (loc1->GetOffset() != loc2->GetOffset()) { // Either two different instance fields, or one is an instance - // field and the other is an array element. + // field and the other is an array data. return false; } if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) { @@ -346,15 +392,17 @@ class HeapLocationCollector : public HGraphVisitor { if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { return false; } - if (loc1->IsArrayElement() && loc2->IsArrayElement()) { - HInstruction* array_index1 = loc1->GetIndex(); - HInstruction* array_index2 = loc2->GetIndex(); - if (!CanArrayIndicesAlias(array_index1, array_index2)) { + if (loc1->IsArray() && loc2->IsArray()) { + HInstruction* idx1 = loc1->GetIndex(); + HInstruction* idx2 = loc2->GetIndex(); + size_t vector_length1 = loc1->GetVectorLength(); + size_t vector_length2 = loc2->GetVectorLength(); + if (!CanArrayElementsAlias(idx1, vector_length1, idx2, vector_length2)) { return false; } - ReferenceInfo* ref_info = loc1->GetReferenceInfo(); - ref_info->SetHasIndexAliasing(true); } + loc1->SetHasAliasedLocations(true); + loc2->SetHasAliasedLocations(true); return true; } @@ -362,14 +410,14 @@ class HeapLocationCollector : public HGraphVisitor { ReferenceInfo* ref_info = FindReferenceInfoOf(instruction); if (ref_info == nullptr) { size_t pos = ref_info_array_.size(); - ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos); + ref_info = new (GetGraph()->GetAllocator()) ReferenceInfo(instruction, pos); ref_info_array_.push_back(ref_info); } return ref_info; } void CreateReferenceInfoForReferenceType(HInstruction* instruction) { - if (instruction->GetType() != Primitive::kPrimNot) { + if (instruction->GetType() != DataType::Type::kReference) { return; } DCHECK(FindReferenceInfoOf(instruction) == nullptr); @@ -379,14 +427,15 @@ class HeapLocationCollector : public HGraphVisitor { HeapLocation* GetOrCreateHeapLocation(HInstruction* ref, size_t offset, HInstruction* index, + size_t vector_length, int16_t declaring_class_def_index) { HInstruction* original_ref = HuntForOriginalReference(ref); ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref); size_t heap_location_idx = FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); + ref_info, offset, index, vector_length, declaring_class_def_index); if (heap_location_idx == kHeapLocationNotFound) { - HeapLocation* heap_loc = new (GetGraph()->GetArena()) - HeapLocation(ref_info, offset, index, declaring_class_def_index); + HeapLocation* heap_loc = new (GetGraph()->GetAllocator()) + HeapLocation(ref_info, offset, index, vector_length, declaring_class_def_index); heap_locations_.push_back(heap_loc); return heap_loc; } @@ -399,12 +448,19 @@ class HeapLocationCollector : public HGraphVisitor { } const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex(); const size_t offset = field_info.GetFieldOffset().SizeValue(); - return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index); + return GetOrCreateHeapLocation(ref, + offset, + nullptr, + HeapLocation::kScalar, + declaring_class_def_index); } - void VisitArrayAccess(HInstruction* array, HInstruction* index) { - GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset, - index, HeapLocation::kDeclaringClassDefIndexForArrays); + void VisitArrayAccess(HInstruction* array, HInstruction* index, size_t vector_length) { + GetOrCreateHeapLocation(array, + HeapLocation::kInvalidFieldOffset, + index, + vector_length, + HeapLocation::kDeclaringClassDefIndexForArrays); } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE { @@ -452,58 +508,42 @@ class HeapLocationCollector : public HGraphVisitor { // since we cannot accurately track the fields. void VisitArrayGet(HArrayGet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + VisitArrayAccess(array, index, HeapLocation::kScalar); CreateReferenceInfoForReferenceType(instruction); } void VisitArraySet(HArraySet* instruction) OVERRIDE { - VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1)); + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + VisitArrayAccess(array, index, HeapLocation::kScalar); has_heap_stores_ = true; } - void VisitNewInstance(HNewInstance* new_instance) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_instance. - CreateReferenceInfoForReferenceType(new_instance); - } - - void VisitNewArray(HNewArray* new_array) OVERRIDE { - // Any references appearing in the ref_info_array_ so far cannot alias with new_array. - CreateReferenceInfoForReferenceType(new_array); - } - - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE { + void VisitVecLoad(HVecLoad* instruction) OVERRIDE { + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + VisitArrayAccess(array, index, instruction->GetVectorLength()); CreateReferenceInfoForReferenceType(instruction); } - void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokeUnresolved(HInvokeUnresolved* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitInvokePolymorphic(HInvokePolymorphic* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitLoadString(HLoadString* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitPhi(HPhi* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); - } - - void VisitParameterValue(HParameterValue* instruction) OVERRIDE { - CreateReferenceInfoForReferenceType(instruction); + void VisitVecStore(HVecStore* instruction) OVERRIDE { + HInstruction* array = instruction->InputAt(0); + HInstruction* index = instruction->InputAt(1); + VisitArrayAccess(array, index, instruction->GetVectorLength()); + has_heap_stores_ = true; } - void VisitSelect(HSelect* instruction) OVERRIDE { + void VisitInstruction(HInstruction* instruction) OVERRIDE { + // Any new-instance or new-array cannot alias with references that + // pre-exist the new-instance/new-array. We append entries into + // ref_info_array_ which keeps track of the order of creation + // of reference values since we visit the blocks in reverse post order. + // + // By default, VisitXXX() (including VisitPhi()) calls VisitInstruction(), + // unless VisitXXX() is overridden. VisitInstanceFieldGet() etc. above + // also call CreateReferenceInfoForReferenceType() explicitly. CreateReferenceInfoForReferenceType(instruction); } @@ -524,8 +564,8 @@ class HeapLocationCollector : public HGraphVisitor { class LoadStoreAnalysis : public HOptimization { public: - explicit LoadStoreAnalysis(HGraph* graph) - : HOptimization(graph, kLoadStoreAnalysisPassName), + explicit LoadStoreAnalysis(HGraph* graph, const char* name = kLoadStoreAnalysisPassName) + : HOptimization(graph, name), heap_location_collector_(graph) {} const HeapLocationCollector& GetHeapLocationCollector() const { diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc index 81344b52f6..56361a8c90 100644 --- a/compiler/optimizing/load_store_analysis_test.cc +++ b/compiler/optimizing/load_store_analysis_test.cc @@ -22,19 +22,15 @@ namespace art { -class LoadStoreAnalysisTest : public CommonCompilerTest { +class LoadStoreAnalysisTest : public OptimizingUnitTest { public: - LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) { - graph_ = CreateGraph(&allocator_); - } + LoadStoreAnalysisTest() : graph_(CreateGraph()) { } - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; }; TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); @@ -48,17 +44,19 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // array_get2 ArrayGet [array, c2] // array_set1 ArraySet [array, c1, c3] // array_set2 ArraySet [array, index, c3] - HInstruction* array = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* index = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); HInstruction* c1 = graph_->GetIntConstant(1); HInstruction* c2 = graph_->GetIntConstant(2); HInstruction* c3 = graph_->GetIntConstant(3); - HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0); - HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0); - HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0); - HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0); + HInstruction* array_get1 = new (GetAllocator()) HArrayGet(array, c1, DataType::Type::kInt32, 0); + HInstruction* array_get2 = new (GetAllocator()) HArrayGet(array, c2, DataType::Type::kInt32, 0); + HInstruction* array_set1 = + new (GetAllocator()) HArraySet(array, c1, c3, DataType::Type::kInt32, 0); + HInstruction* array_set2 = + new (GetAllocator()) HArraySet(array, index, c3, DataType::Type::kInt32, 0); entry->AddInstruction(array); entry->AddInstruction(index); entry->AddInstruction(array_get1); @@ -80,11 +78,12 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { // Test queries on HeapLocationCollector's ref info and index records. ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array); - size_t field_off = HeapLocation::kInvalidFieldOffset; + size_t field = HeapLocation::kInvalidFieldOffset; + size_t vec = HeapLocation::kScalar; size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays; - size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def); - size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def); - size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def); + size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field, c1, vec, class_def); + size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field, c2, vec, class_def); + size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field, index, vec, class_def); // must find this reference info for array in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations; @@ -106,7 +105,7 @@ TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) { } TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); @@ -118,38 +117,38 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // get_field20 InstanceFieldGet [object, 20] HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); - HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, - c1, - nullptr, - Primitive::kPrimInt, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); - HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object, - nullptr, - Primitive::kPrimInt, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); - HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object, - nullptr, - Primitive::kPrimInt, - MemberOffset(20), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); + HInstruction* object = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); + HInstanceFieldSet* set_field10 = new (GetAllocator()) HInstanceFieldSet(object, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field10 = new (GetAllocator()) HInstanceFieldGet(object, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); + HInstanceFieldGet* get_field20 = new (GetAllocator()) HInstanceFieldGet(object, + nullptr, + DataType::Type::kInt32, + MemberOffset(20), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); entry->AddInstruction(object); entry->AddInstruction(set_field10); entry->AddInstruction(get_field10); @@ -169,10 +168,8 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { // Test queries on HeapLocationCollector's ref info and index records. ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object); - size_t loc1 = heap_location_collector.FindHeapLocationIndex( - ref, 10, nullptr, kUnknownClassDefIndex); - size_t loc2 = heap_location_collector.FindHeapLocationIndex( - ref, 20, nullptr, kUnknownClassDefIndex); + size_t loc1 = heap_location_collector.GetFieldHeapLocation(object, &get_field10->GetFieldInfo()); + size_t loc2 = heap_location_collector.GetFieldHeapLocation(object, &get_field20->GetFieldInfo()); // must find references info for object and in HeapLocationCollector. ASSERT_TRUE(ref != nullptr); // must find these heap locations. @@ -185,32 +182,38 @@ TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) { } TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); graph_->BuildDominatorTree(); - HInstruction* array = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* index = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); HInstruction* c0 = graph_->GetIntConstant(0); HInstruction* c1 = graph_->GetIntConstant(1); HInstruction* c_neg1 = graph_->GetIntConstant(-1); - HInstruction* add0 = new (&allocator_) HAdd(Primitive::kPrimInt, index, c0); - HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, index, c1); - HInstruction* sub0 = new (&allocator_) HSub(Primitive::kPrimInt, index, c0); - HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, index, c1); - HInstruction* sub_neg1 = new (&allocator_) HSub(Primitive::kPrimInt, index, c_neg1); - HInstruction* rev_sub1 = new (&allocator_) HSub(Primitive::kPrimInt, c1, index); - HInstruction* arr_set1 = new (&allocator_) HArraySet(array, c0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set2 = new (&allocator_) HArraySet(array, c1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set3 = new (&allocator_) HArraySet(array, add0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set4 = new (&allocator_) HArraySet(array, add1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set5 = new (&allocator_) HArraySet(array, sub0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set6 = new (&allocator_) HArraySet(array, sub1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set7 = new (&allocator_) HArraySet(array, rev_sub1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set8 = new (&allocator_) HArraySet(array, sub_neg1, c0, Primitive::kPrimInt, 0); + HInstruction* add0 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c0); + HInstruction* add1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c1); + HInstruction* sub0 = new (GetAllocator()) HSub(DataType::Type::kInt32, index, c0); + HInstruction* sub1 = new (GetAllocator()) HSub(DataType::Type::kInt32, index, c1); + HInstruction* sub_neg1 = new (GetAllocator()) HSub(DataType::Type::kInt32, index, c_neg1); + HInstruction* rev_sub1 = new (GetAllocator()) HSub(DataType::Type::kInt32, c1, index); + HInstruction* arr_set1 = new (GetAllocator()) HArraySet(array, c0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set2 = new (GetAllocator()) HArraySet(array, c1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set3 = + new (GetAllocator()) HArraySet(array, add0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set4 = + new (GetAllocator()) HArraySet(array, add1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set5 = + new (GetAllocator()) HArraySet(array, sub0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set6 = + new (GetAllocator()) HArraySet(array, sub1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set7 = + new (GetAllocator()) HArraySet(array, rev_sub1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set8 = + new (GetAllocator()) HArraySet(array, sub_neg1, c0, DataType::Type::kInt32, 0); entry->AddInstruction(array); entry->AddInstruction(index); @@ -243,41 +246,246 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexAliasingTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[0] and array[1] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, c0); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add0); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[1-i] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, rev_sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); + loc2 = heap_location_collector.GetArrayHeapLocation(array, rev_sub1); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+1] and array[i-(-1)] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add1); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_neg1); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add1); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_neg1); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); } +TEST_F(LoadStoreAnalysisTest, ArrayAliasingTest) { + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + graph_->BuildDominatorTree(); + + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); + HInstruction* c0 = graph_->GetIntConstant(0); + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* c6 = graph_->GetIntConstant(6); + HInstruction* c8 = graph_->GetIntConstant(8); + + HInstruction* arr_set_0 = new (GetAllocator()) HArraySet(array, + c0, + c0, + DataType::Type::kInt32, + 0); + HInstruction* arr_set_1 = new (GetAllocator()) HArraySet(array, + c1, + c0, + DataType::Type::kInt32, + 0); + HInstruction* arr_set_i = new (GetAllocator()) HArraySet(array, + index, + c0, + DataType::Type::kInt32, + 0); + + HVecOperation* v1 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(), + c1, + DataType::Type::kInt32, + 4, + kNoDexPc); + HVecOperation* v2 = new (GetAllocator()) HVecReplicateScalar(GetAllocator(), + c1, + DataType::Type::kInt32, + 2, + kNoDexPc); + HInstruction* i_add6 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c6); + HInstruction* i_add8 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c8); + + HInstruction* vstore_0 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + c0, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_1 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + c1, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_8 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + c8, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_i = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + index, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_i_add6 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + i_add6, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_i_add8 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + i_add8, + v1, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HInstruction* vstore_i_add6_vlen2 = new (GetAllocator()) HVecStore( + GetAllocator(), + array, + i_add6, + v2, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 2, + kNoDexPc); + + entry->AddInstruction(array); + entry->AddInstruction(index); + + entry->AddInstruction(arr_set_0); + entry->AddInstruction(arr_set_1); + entry->AddInstruction(arr_set_i); + entry->AddInstruction(v1); + entry->AddInstruction(v2); + entry->AddInstruction(i_add6); + entry->AddInstruction(i_add8); + entry->AddInstruction(vstore_0); + entry->AddInstruction(vstore_1); + entry->AddInstruction(vstore_8); + entry->AddInstruction(vstore_i); + entry->AddInstruction(vstore_i_add6); + entry->AddInstruction(vstore_i_add8); + entry->AddInstruction(vstore_i_add6_vlen2); + + LoadStoreAnalysis lsa(graph_); + lsa.Run(); + const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); + + // LSA/HeapLocationCollector should see those instructions. + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 10U); + ASSERT_TRUE(heap_location_collector.HasHeapStores()); + + // Test queries on HeapLocationCollector's aliasing matrix after load store analysis. + size_t loc1, loc2; + + // Test alias: array[0] and array[0,1,2,3] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[0] and array[8,9,10,11] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[1] and array[8,9,10,11] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[1] and array[0,1,2,3] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[0,1,2,3] and array[8,9,10,11] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c8, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[0,1,2,3] and array[1,2,3,4] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c1, 4); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[0] and array[i,i+1,i+2,i+3] + loc1 = heap_location_collector.GetArrayHeapLocation(array, c0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i] and array[0,1,2,3] + loc1 = heap_location_collector.GetArrayHeapLocation(array, index); + loc2 = heap_location_collector.GetArrayHeapLocation(array, c0, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i] and array[i,i+1,i+2,i+3] + loc1 = heap_location_collector.GetArrayHeapLocation(array, index); + loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i] and array[i+8,i+9,i+10,i+11] + loc1 = heap_location_collector.GetArrayHeapLocation(array, index); + loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i+6,i+7,i+8,i+9] and array[i+8,i+9,i+10,i+11] + // Test partial overlap. + loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 4); + loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i+6,i+7] and array[i,i+1,i+2,i+3] + // Test different vector lengths. + loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); + loc2 = heap_location_collector.GetArrayHeapLocation(array, index, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); + + // Test alias: array[i+6,i+7] and array[i+8,i+9,i+10,i+11] + loc1 = heap_location_collector.GetArrayHeapLocation(array, i_add6, 2); + loc2 = heap_location_collector.GetArrayHeapLocation(array, i_add8, 4); + ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); +} + TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); graph_->BuildDominatorTree(); - HInstruction* array = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* index = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); HInstruction* c0 = graph_->GetIntConstant(0); HInstruction* c_0x80000000 = graph_->GetIntConstant(0x80000000); @@ -287,34 +495,41 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { HInstruction* c_0x80000001 = graph_->GetIntConstant(0x80000001); // `index+0x80000000` and `index-0x80000000` array indices MAY alias. - HInstruction* add_0x80000000 = new (&allocator_) HAdd(Primitive::kPrimInt, index, c_0x80000000); - HInstruction* sub_0x80000000 = new (&allocator_) HSub(Primitive::kPrimInt, index, c_0x80000000); - HInstruction* arr_set_1 = new (&allocator_) HArraySet( - array, add_0x80000000, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_2 = new (&allocator_) HArraySet( - array, sub_0x80000000, c0, Primitive::kPrimInt, 0); + HInstruction* add_0x80000000 = new (GetAllocator()) HAdd( + DataType::Type::kInt32, index, c_0x80000000); + HInstruction* sub_0x80000000 = new (GetAllocator()) HSub( + DataType::Type::kInt32, index, c_0x80000000); + HInstruction* arr_set_1 = new (GetAllocator()) HArraySet( + array, add_0x80000000, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_2 = new (GetAllocator()) HArraySet( + array, sub_0x80000000, c0, DataType::Type::kInt32, 0); // `index+0x10` and `index-0xFFFFFFF0` array indices MAY alias. - HInstruction* add_0x10 = new (&allocator_) HAdd(Primitive::kPrimInt, index, c_0x10); - HInstruction* sub_0xFFFFFFF0 = new (&allocator_) HSub(Primitive::kPrimInt, index, c_0xFFFFFFF0); - HInstruction* arr_set_3 = new (&allocator_) HArraySet( - array, add_0x10, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_4 = new (&allocator_) HArraySet( - array, sub_0xFFFFFFF0, c0, Primitive::kPrimInt, 0); + HInstruction* add_0x10 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c_0x10); + HInstruction* sub_0xFFFFFFF0 = new (GetAllocator()) HSub( + DataType::Type::kInt32, index, c_0xFFFFFFF0); + HInstruction* arr_set_3 = new (GetAllocator()) HArraySet( + array, add_0x10, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_4 = new (GetAllocator()) HArraySet( + array, sub_0xFFFFFFF0, c0, DataType::Type::kInt32, 0); // `index+0x7FFFFFFF` and `index-0x80000001` array indices MAY alias. - HInstruction* add_0x7FFFFFFF = new (&allocator_) HAdd(Primitive::kPrimInt, index, c_0x7FFFFFFF); - HInstruction* sub_0x80000001 = new (&allocator_) HSub(Primitive::kPrimInt, index, c_0x80000001); - HInstruction* arr_set_5 = new (&allocator_) HArraySet( - array, add_0x7FFFFFFF, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_6 = new (&allocator_) HArraySet( - array, sub_0x80000001, c0, Primitive::kPrimInt, 0); + HInstruction* add_0x7FFFFFFF = new (GetAllocator()) HAdd( + DataType::Type::kInt32, index, c_0x7FFFFFFF); + HInstruction* sub_0x80000001 = new (GetAllocator()) HSub( + DataType::Type::kInt32, index, c_0x80000001); + HInstruction* arr_set_5 = new (GetAllocator()) HArraySet( + array, add_0x7FFFFFFF, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_6 = new (GetAllocator()) HArraySet( + array, sub_0x80000001, c0, DataType::Type::kInt32, 0); // `index+0` and `index-0` array indices MAY alias. - HInstruction* add_0 = new (&allocator_) HAdd(Primitive::kPrimInt, index, c0); - HInstruction* sub_0 = new (&allocator_) HSub(Primitive::kPrimInt, index, c0); - HInstruction* arr_set_7 = new (&allocator_) HArraySet(array, add_0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_8 = new (&allocator_) HArraySet(array, sub_0, c0, Primitive::kPrimInt, 0); + HInstruction* add_0 = new (GetAllocator()) HAdd(DataType::Type::kInt32, index, c0); + HInstruction* sub_0 = new (GetAllocator()) HSub(DataType::Type::kInt32, index, c0); + HInstruction* arr_set_7 = new (GetAllocator()) HArraySet( + array, add_0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_8 = new (GetAllocator()) HArraySet( + array, sub_0, c0, DataType::Type::kInt32, 0); entry->AddInstruction(array); entry->AddInstruction(index); @@ -348,34 +563,98 @@ TEST_F(LoadStoreAnalysisTest, ArrayIndexCalculationOverflowTest) { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test alias: array[i+0x80000000] and array[i-0x80000000] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add_0x80000000); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x80000000); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x10] and array[i-0xFFFFFFF0] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add_0x10); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0xFFFFFFF0); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x10); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0xFFFFFFF0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0x7FFFFFFF] and array[i-0x80000001] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add_0x7FFFFFFF); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0x7FFFFFFF); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Test alias: array[i+0] and array[i-0] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0x80000000); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0x80000001); + loc1 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000001); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); // Should not alias: - loc1 = heap_location_collector.GetArrayAccessHeapLocation(array, add_0); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(array, sub_0x80000000); + loc1 = heap_location_collector.GetArrayHeapLocation(array, add_0); + loc2 = heap_location_collector.GetArrayHeapLocation(array, sub_0x80000000); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); } +TEST_F(LoadStoreAnalysisTest, TestHuntOriginalRef) { + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry); + graph_->SetEntryBlock(entry); + + // Different ways where orignal array reference are transformed & passed to ArrayGet. + // ParameterValue --> ArrayGet + // ParameterValue --> BoundType --> ArrayGet + // ParameterValue --> BoundType --> NullCheck --> ArrayGet + // ParameterValue --> BoundType --> NullCheck --> IntermediateAddress --> ArrayGet + HInstruction* c1 = graph_->GetIntConstant(1); + HInstruction* array = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); + HInstruction* array_get1 = new (GetAllocator()) HArrayGet(array, + c1, + DataType::Type::kInt32, + 0); + + HInstruction* bound_type = new (GetAllocator()) HBoundType(array); + HInstruction* array_get2 = new (GetAllocator()) HArrayGet(bound_type, + c1, + DataType::Type::kInt32, + 0); + + HInstruction* null_check = new (GetAllocator()) HNullCheck(bound_type, 0); + HInstruction* array_get3 = new (GetAllocator()) HArrayGet(null_check, + c1, + DataType::Type::kInt32, + 0); + + HInstruction* inter_addr = new (GetAllocator()) HIntermediateAddress(null_check, c1, 0); + HInstruction* array_get4 = new (GetAllocator()) HArrayGet(inter_addr, + c1, + DataType::Type::kInt32, + 0); + entry->AddInstruction(array); + entry->AddInstruction(array_get1); + entry->AddInstruction(bound_type); + entry->AddInstruction(array_get2); + entry->AddInstruction(null_check); + entry->AddInstruction(array_get3); + entry->AddInstruction(inter_addr); + entry->AddInstruction(array_get4); + + HeapLocationCollector heap_location_collector(graph_); + heap_location_collector.VisitBasicBlock(entry); + + // Test that the HeapLocationCollector should be able to tell + // that there is only ONE array location, no matter how many + // times the original reference has been transformed by BoundType, + // NullCheck, IntermediateAddress, etc. + ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 1U); + size_t loc1 = heap_location_collector.GetArrayHeapLocation(array, c1); + size_t loc2 = heap_location_collector.GetArrayHeapLocation(bound_type, c1); + size_t loc3 = heap_location_collector.GetArrayHeapLocation(null_check, c1); + size_t loc4 = heap_location_collector.GetArrayHeapLocation(inter_addr, c1); + ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound); + ASSERT_EQ(loc1, loc2); + ASSERT_EQ(loc1, loc3); + ASSERT_EQ(loc1, loc4); +} + } // namespace art diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc index 211528b4bd..237ecd3c10 100644 --- a/compiler/optimizing/load_store_elimination.cc +++ b/compiler/optimizing/load_store_elimination.cc @@ -14,14 +14,62 @@ * limitations under the License. */ -#include "load_store_analysis.h" #include "load_store_elimination.h" +#include "base/array_ref.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "escape.h" +#include "load_store_analysis.h" #include "side_effects_analysis.h" #include <iostream> +/** + * The general algorithm of load-store elimination (LSE). + * Load-store analysis in the previous pass collects a list of heap locations + * and does alias analysis of those heap locations. + * LSE keeps track of a list of heap values corresponding to the heap + * locations. It visits basic blocks in reverse post order and for + * each basic block, visits instructions sequentially, and processes + * instructions as follows: + * - If the instruction is a load, and the heap location for that load has a + * valid heap value, the load can be eliminated. In order to maintain the + * validity of all heap locations during the optimization phase, the real + * elimination is delayed till the end of LSE. + * - If the instruction is a store, it updates the heap value for the heap + * location of the store with the store instruction. The real heap value + * can be fetched from the store instruction. Heap values are invalidated + * for heap locations that may alias with the store instruction's heap + * location. The store instruction can be eliminated unless the value stored + * is later needed e.g. by a load from the same/aliased heap location or + * the heap location persists at method return/deoptimization. + * The store instruction is also needed if it's not used to track the heap + * value anymore, e.g. when it fails to merge with the heap values from other + * predecessors. + * - A store that stores the same value as the heap value is eliminated. + * - The list of heap values are merged at basic block entry from the basic + * block's predecessors. The algorithm is single-pass, so loop side-effects is + * used as best effort to decide if a heap location is stored inside the loop. + * - A special type of objects called singletons are instantiated in the method + * and have a single name, i.e. no aliases. Singletons have exclusive heap + * locations since they have no aliases. Singletons are helpful in narrowing + * down the life span of a heap location such that they do not always + * need to participate in merging heap values. Allocation of a singleton + * can be eliminated if that singleton is not used and does not persist + * at method return/deoptimization. + * - For newly instantiated instances, their heap values are initialized to + * language defined default values. + * - Some instructions such as invokes are treated as loading and invalidating + * all the heap values, depending on the instruction's side effects. + * - Finalizable objects are considered as persisting at method + * return/deoptimization. + * - Currently this LSE algorithm doesn't handle SIMD graph, e.g. with VecLoad + * and VecStore instructions. + * - Currently this LSE algorithm doesn't handle graph with try-catch, due to + * the special block merging structure. + */ + namespace art { // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. @@ -36,25 +84,27 @@ static HInstruction* const kUnknownHeapValue = static HInstruction* const kDefaultHeapValue = reinterpret_cast<HInstruction*>(static_cast<uintptr_t>(-2)); -class LSEVisitor : public HGraphVisitor { +// Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke(). +class LSEVisitor : public HGraphDelegateVisitor { public: LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_locations_collector, - const SideEffectsAnalysis& side_effects) - : HGraphVisitor(graph), + const SideEffectsAnalysis& side_effects, + OptimizingCompilerStats* stats) + : HGraphDelegateVisitor(graph, stats), heap_location_collector_(heap_locations_collector), side_effects_(side_effects), + allocator_(graph->GetArenaStack()), heap_values_for_(graph->GetBlocks().size(), - ArenaVector<HInstruction*>(heap_locations_collector. - GetNumberOfHeapLocations(), - kUnknownHeapValue, - graph->GetArena()->Adapter(kArenaAllocLSE)), - graph->GetArena()->Adapter(kArenaAllocLSE)), - removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), - substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)), - possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)), - singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)), - singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) { + ScopedArenaVector<HInstruction*>(heap_locations_collector. + GetNumberOfHeapLocations(), + kUnknownHeapValue, + allocator_.Adapter(kArenaAllocLSE)), + allocator_.Adapter(kArenaAllocLSE)), + removed_loads_(allocator_.Adapter(kArenaAllocLSE)), + substitute_instructions_for_loads_(allocator_.Adapter(kArenaAllocLSE)), + possibly_removed_stores_(allocator_.Adapter(kArenaAllocLSE)), + singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)) { } void VisitBasicBlock(HBasicBlock* block) OVERRIDE { @@ -68,25 +118,123 @@ class LSEVisitor : public HGraphVisitor { HGraphVisitor::VisitBasicBlock(block); } + HTypeConversion* AddTypeConversionIfNecessary(HInstruction* instruction, + HInstruction* value, + DataType::Type expected_type) { + HTypeConversion* type_conversion = nullptr; + // Should never add type conversion into boolean value. + if (expected_type != DataType::Type::kBool && + !DataType::IsTypeConversionImplicit(value->GetType(), expected_type)) { + type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion( + expected_type, value, instruction->GetDexPc()); + instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction); + } + return type_conversion; + } + + // Find an instruction's substitute if it's a removed load. + // Return the same instruction if it should not be removed. + HInstruction* FindSubstitute(HInstruction* instruction) { + if (!IsLoad(instruction)) { + return instruction; + } + size_t size = removed_loads_.size(); + for (size_t i = 0; i < size; i++) { + if (removed_loads_[i] == instruction) { + HInstruction* substitute = substitute_instructions_for_loads_[i]; + // The substitute list is a flat hierarchy. + DCHECK_EQ(FindSubstitute(substitute), substitute); + return substitute; + } + } + return instruction; + } + + void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { + DCHECK(IsLoad(load)); + DCHECK_EQ(FindSubstitute(heap_value), heap_value) << + "Unexpected heap_value that has a substitute " << heap_value->DebugName(); + removed_loads_.push_back(load); + substitute_instructions_for_loads_.push_back(heap_value); + } + + // Scan the list of removed loads to see if we can reuse `type_conversion`, if + // the other removed load has the same substitute and type and is dominated + // by `type_conversioni`. + void TryToReuseTypeConversion(HInstruction* type_conversion, size_t index) { + size_t size = removed_loads_.size(); + HInstruction* load = removed_loads_[index]; + HInstruction* substitute = substitute_instructions_for_loads_[index]; + for (size_t j = index + 1; j < size; j++) { + HInstruction* load2 = removed_loads_[j]; + HInstruction* substitute2 = substitute_instructions_for_loads_[j]; + if (load2 == nullptr) { + DCHECK(substitute2->IsTypeConversion()); + continue; + } + DCHECK(load2->IsInstanceFieldGet() || + load2->IsStaticFieldGet() || + load2->IsArrayGet()); + DCHECK(substitute2 != nullptr); + if (substitute2 == substitute && + load2->GetType() == load->GetType() && + type_conversion->GetBlock()->Dominates(load2->GetBlock()) && + // Don't share across irreducible loop headers. + // TODO: can be more fine-grained than this by testing each dominator. + (load2->GetBlock() == type_conversion->GetBlock() || + !GetGraph()->HasIrreducibleLoops())) { + // The removed_loads_ are added in reverse post order. + DCHECK(type_conversion->StrictlyDominates(load2)); + load2->ReplaceWith(type_conversion); + load2->GetBlock()->RemoveInstruction(load2); + removed_loads_[j] = nullptr; + substitute_instructions_for_loads_[j] = type_conversion; + } + } + } + // Remove recorded instructions that should be eliminated. void RemoveInstructions() { size_t size = removed_loads_.size(); DCHECK_EQ(size, substitute_instructions_for_loads_.size()); for (size_t i = 0; i < size; i++) { HInstruction* load = removed_loads_[i]; - DCHECK(load != nullptr); + if (load == nullptr) { + // The load has been handled in the scan for type conversion below. + DCHECK(substitute_instructions_for_loads_[i]->IsTypeConversion()); + continue; + } DCHECK(load->IsInstanceFieldGet() || load->IsStaticFieldGet() || load->IsArrayGet()); HInstruction* substitute = substitute_instructions_for_loads_[i]; DCHECK(substitute != nullptr); - // Keep tracing substitute till one that's not removed. - HInstruction* sub_sub = FindSubstitute(substitute); - while (sub_sub != substitute) { - substitute = sub_sub; - sub_sub = FindSubstitute(substitute); + // We proactively retrieve the substitute for a removed load, so + // a load that has a substitute should not be observed as a heap + // location value. + DCHECK_EQ(FindSubstitute(substitute), substitute); + + // The load expects to load the heap value as type load->GetType(). + // However the tracked heap value may not be of that type. An explicit + // type conversion may be needed. + // There are actually three types involved here: + // (1) tracked heap value's type (type A) + // (2) heap location (field or element)'s type (type B) + // (3) load's type (type C) + // We guarantee that type A stored as type B and then fetched out as + // type C is the same as casting from type A to type C directly, since + // type B and type C will have the same size which is guarenteed in + // HInstanceFieldGet/HStaticFieldGet/HArrayGet's SetType(). + // So we only need one type conversion from type A to type C. + HTypeConversion* type_conversion = AddTypeConversionIfNecessary( + load, substitute, load->GetType()); + if (type_conversion != nullptr) { + TryToReuseTypeConversion(type_conversion, i); + load->ReplaceWith(type_conversion); + substitute_instructions_for_loads_[i] = type_conversion; + } else { + load->ReplaceWith(substitute); } - load->ReplaceWith(substitute); load->GetBlock()->RemoveInstruction(load); } @@ -100,32 +248,69 @@ class LSEVisitor : public HGraphVisitor { // * - Constructor fences (they never escape this thread). // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { - HConstructorFence::RemoveConstructorFences(new_instance); + size_t removed = HConstructorFence::RemoveConstructorFences(new_instance); + MaybeRecordStat(stats_, + MethodCompilationStat::kConstructorFenceRemovedLSE, + removed); if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); } } - for (HInstruction* new_array : singleton_new_arrays_) { - HConstructorFence::RemoveConstructorFences(new_array); + } - if (!new_array->HasNonEnvironmentUses()) { - new_array->RemoveEnvironmentUsers(); - new_array->GetBlock()->RemoveInstruction(new_array); - } + private: + static bool IsLoad(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; } + // Unresolved load is not treated as a load. + return instruction->IsInstanceFieldGet() || + instruction->IsStaticFieldGet() || + instruction->IsArrayGet(); } - private: - // If heap_values[index] is an instance field store, need to keep the store. - // This is necessary if a heap value is killed due to merging, or loop side - // effects (which is essentially merging also), since a load later from the - // location won't be eliminated. + static bool IsStore(HInstruction* instruction) { + if (instruction == kUnknownHeapValue || instruction == kDefaultHeapValue) { + return false; + } + // Unresolved store is not treated as a store. + return instruction->IsInstanceFieldSet() || + instruction->IsArraySet() || + instruction->IsStaticFieldSet(); + } + + // Returns the real heap value by finding its substitute or by "peeling" + // a store instruction. + HInstruction* GetRealHeapValue(HInstruction* heap_value) { + if (IsLoad(heap_value)) { + return FindSubstitute(heap_value); + } + if (!IsStore(heap_value)) { + return heap_value; + } + + // We keep track of store instructions as the heap values which might be + // eliminated if the stores are later found not necessary. The real stored + // value needs to be fetched from the store instruction. + if (heap_value->IsInstanceFieldSet()) { + heap_value = heap_value->AsInstanceFieldSet()->GetValue(); + } else if (heap_value->IsStaticFieldSet()) { + heap_value = heap_value->AsStaticFieldSet()->GetValue(); + } else { + DCHECK(heap_value->IsArraySet()); + heap_value = heap_value->AsArraySet()->GetValue(); + } + // heap_value may already be a removed load. + return FindSubstitute(heap_value); + } + + // If heap_value is a store, need to keep the store. + // This is necessary if a heap value is killed or replaced by another value, + // so that the store is no longer used to track heap value. void KeepIfIsStore(HInstruction* heap_value) { - if (heap_value == kDefaultHeapValue || - heap_value == kUnknownHeapValue || - !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) { + if (!IsStore(heap_value)) { return; } auto idx = std::find(possibly_removed_stores_.begin(), @@ -136,26 +321,41 @@ class LSEVisitor : public HGraphVisitor { } } + // If a heap location X may alias with heap location at `loc_index` + // and heap_values of that heap location X holds a store, keep that store. + // It's needed for a dependent load that's not eliminated since any store + // that may put value into the load's heap location needs to be kept. + void KeepStoresIfAliasedToLocation(ScopedArenaVector<HInstruction*>& heap_values, + size_t loc_index) { + for (size_t i = 0; i < heap_values.size(); i++) { + if ((i == loc_index) || heap_location_collector_.MayAlias(i, loc_index)) { + KeepIfIsStore(heap_values[i]); + } + } + } + void HandleLoopSideEffects(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); - ArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block_id]; + HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); + ScopedArenaVector<HInstruction*>& pre_header_heap_values = + heap_values_for_[pre_header->GetBlockId()]; - // Don't eliminate loads in irreducible loops. This is safe for singletons, because - // they are always used by the non-eliminated loop-phi. + // Don't eliminate loads in irreducible loops. + // Also keep the stores before the loop. if (block->GetLoopInformation()->IsIrreducible()) { if (kIsDebugBuild) { for (size_t i = 0; i < heap_values.size(); i++) { DCHECK_EQ(heap_values[i], kUnknownHeapValue); } } + for (size_t i = 0; i < heap_values.size(); i++) { + KeepIfIsStore(pre_header_heap_values[i]); + } return; } - HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); - ArenaVector<HInstruction*>& pre_header_heap_values = - heap_values_for_[pre_header->GetBlockId()]; - // Inherit the values from pre-header. for (size_t i = 0; i < heap_values.size(); i++) { heap_values[i] = pre_header_heap_values[i]; @@ -167,80 +367,136 @@ class LSEVisitor : public HGraphVisitor { for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* ref_info = location->GetReferenceInfo(); - if (ref_info->IsSingletonAndRemovable() && - !location->IsValueKilledByLoopSideEffects()) { - // A removable singleton's field that's not stored into inside a loop is + if (ref_info->IsSingleton() && !location->IsValueKilledByLoopSideEffects()) { + // A singleton's field that's not stored into inside a loop is // invariant throughout the loop. Nothing to do. - DCHECK(ref_info->IsSingletonAndRemovable()); } else { - // heap value is killed by loop side effects (stored into directly, or - // due to aliasing). Or the heap value may be needed after method return - // or deoptimization. + // heap value is killed by loop side effects. KeepIfIsStore(pre_header_heap_values[i]); heap_values[i] = kUnknownHeapValue; } } + } else { + // The loop doesn't kill any value. } } void MergePredecessorValues(HBasicBlock* block) { - const ArenaVector<HBasicBlock*>& predecessors = block->GetPredecessors(); + ArrayRef<HBasicBlock* const> predecessors(block->GetPredecessors()); if (predecessors.size() == 0) { return; } + if (block->IsExitBlock()) { + // Exit block doesn't really merge values since the control flow ends in + // its predecessors. Each predecessor needs to make sure stores are kept + // if necessary. + return; + } - ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* merged_value = nullptr; + // If we can merge the store itself from the predecessors, we keep + // the store as the heap value as long as possible. In case we cannot + // merge the store, we try to merge the values of the stores. + HInstruction* merged_store_value = nullptr; // Whether merged_value is a result that's merged from all predecessors. bool from_all_predecessors = true; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + HInstruction* ref = ref_info->GetReference(); HInstruction* singleton_ref = nullptr; if (ref_info->IsSingleton()) { - // We do more analysis of liveness when merging heap values for such - // cases since stores into such references may potentially be eliminated. - singleton_ref = ref_info->GetReference(); + // We do more analysis based on singleton's liveness when merging + // heap values for such cases. + singleton_ref = ref; } for (HBasicBlock* predecessor : predecessors) { HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i]; + if (!IsStore(pred_value)) { + pred_value = FindSubstitute(pred_value); + } + DCHECK(pred_value != nullptr); + HInstruction* pred_store_value = GetRealHeapValue(pred_value); if ((singleton_ref != nullptr) && !singleton_ref->GetBlock()->Dominates(predecessor)) { - // singleton_ref is not live in this predecessor. Skip this predecessor since - // it does not really have the location. + // singleton_ref is not live in this predecessor. No need to merge + // since singleton_ref is not live at the beginning of this block. DCHECK_EQ(pred_value, kUnknownHeapValue); from_all_predecessors = false; - continue; + break; } if (merged_value == nullptr) { // First seen heap value. + DCHECK(pred_value != nullptr); merged_value = pred_value; } else if (pred_value != merged_value) { // There are conflicting values. merged_value = kUnknownHeapValue; + // We may still be able to merge store values. + } + + // Conflicting stores may be storing the same value. We do another merge + // of real stored values. + if (merged_store_value == nullptr) { + // First seen store value. + DCHECK(pred_store_value != nullptr); + merged_store_value = pred_store_value; + } else if (pred_store_value != merged_store_value) { + // There are conflicting store values. + merged_store_value = kUnknownHeapValue; + // There must be conflicting stores also. + DCHECK_EQ(merged_value, kUnknownHeapValue); + // No need to merge anymore. break; } } - if (merged_value == kUnknownHeapValue || ref_info->IsSingletonAndNonRemovable()) { - // There are conflicting heap values from different predecessors, - // or the heap value may be needed after method return or deoptimization. - // Keep the last store in each predecessor since future loads cannot be eliminated. - for (HBasicBlock* predecessor : predecessors) { - ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()]; - KeepIfIsStore(pred_values[i]); + if (merged_value == nullptr) { + DCHECK(!from_all_predecessors); + DCHECK(singleton_ref != nullptr); + } + if (from_all_predecessors) { + if (ref_info->IsSingletonAndRemovable() && + block->IsSingleReturnOrReturnVoidAllowingPhis()) { + // Values in the singleton are not needed anymore. + } else if (!IsStore(merged_value)) { + // We don't track merged value as a store anymore. We have to + // hold the stores in predecessors live here. + for (HBasicBlock* predecessor : predecessors) { + ScopedArenaVector<HInstruction*>& pred_values = + heap_values_for_[predecessor->GetBlockId()]; + KeepIfIsStore(pred_values[i]); + } } + } else { + DCHECK(singleton_ref != nullptr); + // singleton_ref is non-existing at the beginning of the block. There is + // no need to keep the stores. } - if ((merged_value == nullptr) || !from_all_predecessors) { + if (!from_all_predecessors) { DCHECK(singleton_ref != nullptr); DCHECK((singleton_ref->GetBlock() == block) || - !singleton_ref->GetBlock()->Dominates(block)); + !singleton_ref->GetBlock()->Dominates(block)) + << "method: " << GetGraph()->GetMethodName(); // singleton_ref is not defined before block or defined only in some of its // predecessors, so block doesn't really have the location at its entry. heap_values[i] = kUnknownHeapValue; - } else { + } else if (predecessors.size() == 1) { + // Inherit heap value from the single predecessor. + DCHECK_EQ(heap_values_for_[predecessors[0]->GetBlockId()][i], merged_value); heap_values[i] = merged_value; + } else { + DCHECK(merged_value == kUnknownHeapValue || + merged_value == kDefaultHeapValue || + merged_value->GetBlock()->Dominates(block)); + if (merged_value != kUnknownHeapValue) { + heap_values[i] = merged_value; + } else { + // Stores in different predecessors may be storing the same value. + heap_values[i] = merged_store_value; + } } } } @@ -264,21 +520,22 @@ class LSEVisitor : public HGraphVisitor { } } - HInstruction* GetDefaultValue(Primitive::Type type) { + HInstruction* GetDefaultValue(DataType::Type type) { switch (type) { - case Primitive::kPrimNot: + case DataType::Type::kReference: return GetGraph()->GetNullConstant(); - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: return GetGraph()->GetIntConstant(0); - case Primitive::kPrimLong: + case DataType::Type::kInt64: return GetGraph()->GetLongConstant(0); - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: return GetGraph()->GetFloatConstant(0); - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: return GetGraph()->GetDoubleConstant(0); default: UNREACHABLE(); @@ -289,40 +546,30 @@ class LSEVisitor : public HGraphVisitor { HInstruction* ref, size_t offset, HInstruction* index, + size_t vector_length, int16_t declaring_class_def_index) { HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); + ref_info, offset, index, vector_length, declaring_class_def_index); DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); - ArenaVector<HInstruction*>& heap_values = + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; if (heap_value == kDefaultHeapValue) { HInstruction* constant = GetDefaultValue(instruction->GetType()); - removed_loads_.push_back(instruction); - substitute_instructions_for_loads_.push_back(constant); + AddRemovedLoad(instruction, constant); heap_values[idx] = constant; return; } - if (heap_value != kUnknownHeapValue) { - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - HInstruction* store = heap_value; - // This load must be from a singleton since it's from the same - // field/element that a "removed" store puts the value. That store - // must be to a singleton's field/element. - DCHECK(ref_info->IsSingleton()); - // Get the real heap value of the store. - heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2); - } - } + heap_value = GetRealHeapValue(heap_value); if (heap_value == kUnknownHeapValue) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); } else { - if (Primitive::PrimitiveKind(heap_value->GetType()) - != Primitive::PrimitiveKind(instruction->GetType())) { + if (DataType::Kind(heap_value->GetType()) != DataType::Kind(instruction->GetType())) { // The only situation where the same heap location has different type is when // we do an array get on an instruction that originates from the null constant // (the null could be behind a field access, an array access, a null check or @@ -333,21 +580,33 @@ class LSEVisitor : public HGraphVisitor { DCHECK(heap_value->IsArrayGet()) << heap_value->DebugName(); DCHECK(instruction->IsArrayGet()) << instruction->DebugName(); } + // Load isn't eliminated. Put the load as the value into the HeapLocation. + // This acts like GVN but with better aliasing analysis. + heap_values[idx] = instruction; + KeepStoresIfAliasedToLocation(heap_values, idx); return; } - removed_loads_.push_back(instruction); - substitute_instructions_for_loads_.push_back(heap_value); + AddRemovedLoad(instruction, heap_value); TryRemovingNullCheck(instruction); } } bool Equal(HInstruction* heap_value, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + if (heap_value == kUnknownHeapValue) { + // Don't compare kUnknownHeapValue with other values. + return false; + } if (heap_value == value) { return true; } if (heap_value == kDefaultHeapValue && GetDefaultValue(value->GetType()) == value) { return true; } + HInstruction* real_heap_value = GetRealHeapValue(heap_value); + if (real_heap_value != heap_value) { + return Equal(real_heap_value, value); + } return false; } @@ -355,73 +614,61 @@ class LSEVisitor : public HGraphVisitor { HInstruction* ref, size_t offset, HInstruction* index, + size_t vector_length, int16_t declaring_class_def_index, HInstruction* value) { + DCHECK(!IsStore(value)) << value->DebugName(); + // value may already have a substitute. + value = FindSubstitute(value); HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref); size_t idx = heap_location_collector_.FindHeapLocationIndex( - ref_info, offset, index, declaring_class_def_index); + ref_info, offset, index, vector_length, declaring_class_def_index); DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); - ArenaVector<HInstruction*>& heap_values = + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; HInstruction* heap_value = heap_values[idx]; - bool same_value = false; bool possibly_redundant = false; + if (Equal(heap_value, value)) { // Store into the heap location with the same value. - same_value = true; - } else if (index != nullptr && ref_info->HasIndexAliasing()) { - // For array element, don't eliminate stores if the index can be aliased. - } else if (ref_info->IsSingleton()) { - // Store into a field of a singleton. The value cannot be killed due to - // aliasing/invocation. It can be redundant since future loads can - // directly get the value set by this instruction. The value can still be killed due to - // merging or loop side effects. Stores whose values are killed due to merging/loop side - // effects later will be removed from possibly_removed_stores_ when that is detected. - // Stores whose values may be needed after method return or deoptimization - // are also removed from possibly_removed_stores_ when that is detected. - possibly_redundant = true; - HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance(); - if (new_instance != nullptr && new_instance->IsFinalizable()) { - // Finalizable objects escape globally. Need to keep the store. - possibly_redundant = false; - } else { - HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); - if (loop_info != nullptr) { - // instruction is a store in the loop so the loop must does write. - DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); - - if (loop_info->IsDefinedOutOfTheLoop(original_ref)) { - DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader())); - // Keep the store since its value may be needed at the loop header. - possibly_redundant = false; - } else { - // The singleton is created inside the loop. Value stored to it isn't needed at - // the loop header. This is true for outer loops also. - } + // This store can be eliminated right away. + instruction->GetBlock()->RemoveInstruction(instruction); + return; + } else { + HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation(); + if (loop_info == nullptr) { + // Store is not in a loop. We try to precisely track the heap value by + // the store. + possibly_redundant = true; + } else if (!loop_info->IsIrreducible()) { + // instruction is a store in the loop so the loop must do write. + DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite()); + if (ref_info->IsSingleton() && !loop_info->IsDefinedOutOfTheLoop(original_ref)) { + // original_ref is created inside the loop. Value stored to it isn't needed at + // the loop header. This is true for outer loops also. + possibly_redundant = true; + } else { + // Keep the store since its value may be needed at the loop header. } + } else { + // Keep the store inside irreducible loops. } } - if (same_value || possibly_redundant) { + if (possibly_redundant) { possibly_removed_stores_.push_back(instruction); } - if (!same_value) { - if (possibly_redundant) { - DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet()); - // Put the store as the heap value. If the value is loaded from heap - // by a load later, this store isn't really redundant. - heap_values[idx] = instruction; - } else { - heap_values[idx] = value; - } - } + // Put the store as the heap value. If the value is loaded or needed after + // return/deoptimization later, this store isn't really redundant. + heap_values[idx] = instruction; + // This store may kill values in other heap locations due to aliasing. for (size_t i = 0; i < heap_values.size(); i++) { if (i == idx) { continue; } - if (heap_values[i] == value) { + if (Equal(heap_values[i], value)) { // Same value should be kept even if aliasing happens. continue; } @@ -430,7 +677,9 @@ class LSEVisitor : public HGraphVisitor { continue; } if (heap_location_collector_.MayAlias(i, idx)) { - // Kill heap locations that may alias. + // Kill heap locations that may alias and as a result if the heap value + // is a store, the store needs to be kept. + KeepIfIsStore(heap_values[i]); heap_values[i] = kUnknownHeapValue; } } @@ -440,7 +689,12 @@ class LSEVisitor : public HGraphVisitor { HInstruction* obj = instruction->InputAt(0); size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, obj, offset, nullptr, declaring_class_def_index); + VisitGetLocation(instruction, + obj, + offset, + nullptr, + HeapLocation::kScalar, + declaring_class_def_index); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE { @@ -448,14 +702,25 @@ class LSEVisitor : public HGraphVisitor { size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, obj, offset, nullptr, declaring_class_def_index, value); + VisitSetLocation(instruction, + obj, + offset, + nullptr, + HeapLocation::kScalar, + declaring_class_def_index, + value); } void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE { HInstruction* cls = instruction->InputAt(0); size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); - VisitGetLocation(instruction, cls, offset, nullptr, declaring_class_def_index); + VisitGetLocation(instruction, + cls, + offset, + nullptr, + HeapLocation::kScalar, + declaring_class_def_index); } void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE { @@ -463,7 +728,13 @@ class LSEVisitor : public HGraphVisitor { size_t offset = instruction->GetFieldInfo().GetFieldOffset().SizeValue(); int16_t declaring_class_def_index = instruction->GetFieldInfo().GetDeclaringClassDefIndex(); HInstruction* value = instruction->InputAt(1); - VisitSetLocation(instruction, cls, offset, nullptr, declaring_class_def_index, value); + VisitSetLocation(instruction, + cls, + offset, + nullptr, + HeapLocation::kScalar, + declaring_class_def_index, + value); } void VisitArrayGet(HArrayGet* instruction) OVERRIDE { @@ -473,6 +744,7 @@ class LSEVisitor : public HGraphVisitor { array, HeapLocation::kInvalidFieldOffset, index, + HeapLocation::kScalar, HeapLocation::kDeclaringClassDefIndexForArrays); } @@ -484,67 +756,97 @@ class LSEVisitor : public HGraphVisitor { array, HeapLocation::kInvalidFieldOffset, index, + HeapLocation::kScalar, HeapLocation::kDeclaringClassDefIndexForArrays, value); } void VisitDeoptimize(HDeoptimize* instruction) { - const ArenaVector<HInstruction*>& heap_values = + const ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (HInstruction* heap_value : heap_values) { - // Filter out fake instructions before checking instruction kind below. - if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) { - continue; - } // A store is kept as the heap value for possibly removed stores. - if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) { - // Check whether the reference for a store is used by an environment local of - // HDeoptimize. + // That value stored is generally observeable after deoptimization, except + // for singletons that don't escape after deoptimization. + if (IsStore(heap_value)) { + if (heap_value->IsStaticFieldSet()) { + KeepIfIsStore(heap_value); + continue; + } HInstruction* reference = heap_value->InputAt(0); - DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()); - for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { - HEnvironment* user = use.GetUser(); - if (user->GetHolder() == instruction) { - // The singleton for the store is visible at this deoptimization - // point. Need to keep the store so that the heap value is - // seen by the interpreter. + if (heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton()) { + if (reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable()) { + // Finalizable objects alway escape. KeepIfIsStore(heap_value); + continue; } + // Check whether the reference for a store is used by an environment local of + // HDeoptimize. If not, the singleton is not observed after + // deoptimizion. + for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) { + HEnvironment* user = use.GetUser(); + if (user->GetHolder() == instruction) { + // The singleton for the store is visible at this deoptimization + // point. Need to keep the store so that the heap value is + // seen by the interpreter. + KeepIfIsStore(heap_value); + } + } + } else { + KeepIfIsStore(heap_value); } } } } - void HandleInvoke(HInstruction* invoke) { - ArenaVector<HInstruction*>& heap_values = - heap_values_for_[invoke->GetBlock()->GetBlockId()]; + // Keep necessary stores before exiting a method via return/throw. + void HandleExit(HBasicBlock* block) { + const ScopedArenaVector<HInstruction*>& heap_values = + heap_values_for_[block->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { + HInstruction* heap_value = heap_values[i]; ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); - if (ref_info->IsSingleton()) { - // Singleton references cannot be seen by the callee. - } else { - heap_values[i] = kUnknownHeapValue; + if (!ref_info->IsSingletonAndRemovable()) { + KeepIfIsStore(heap_value); } } } - void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE { - HandleInvoke(invoke); + void VisitReturn(HReturn* instruction) OVERRIDE { + HandleExit(instruction->GetBlock()); } - void VisitInvokeVirtual(HInvokeVirtual* invoke) OVERRIDE { - HandleInvoke(invoke); + void VisitReturnVoid(HReturnVoid* return_void) OVERRIDE { + HandleExit(return_void->GetBlock()); } - void VisitInvokeInterface(HInvokeInterface* invoke) OVERRIDE { - HandleInvoke(invoke); + void VisitThrow(HThrow* throw_instruction) OVERRIDE { + HandleExit(throw_instruction->GetBlock()); } - void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE { - HandleInvoke(invoke); + void HandleInvoke(HInstruction* instruction) { + SideEffects side_effects = instruction->GetSideEffects(); + ScopedArenaVector<HInstruction*>& heap_values = + heap_values_for_[instruction->GetBlock()->GetBlockId()]; + for (size_t i = 0; i < heap_values.size(); i++) { + ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); + if (ref_info->IsSingleton()) { + // Singleton references cannot be seen by the callee. + } else { + if (side_effects.DoesAnyRead()) { + // Invocation may read the heap value. + KeepIfIsStore(heap_values[i]); + } + if (side_effects.DoesAnyWrite()) { + // Keep the store since it's not used to track the heap value anymore. + KeepIfIsStore(heap_values[i]); + heap_values[i] = kUnknownHeapValue; + } + } + } } - void VisitInvokePolymorphic(HInvokePolymorphic* invoke) OVERRIDE { + void VisitInvoke(HInvoke* invoke) OVERRIDE { HandleInvoke(invoke); } @@ -578,12 +880,12 @@ class LSEVisitor : public HGraphVisitor { // new_instance isn't used for field accesses. No need to process it. return; } - if (ref_info->IsSingletonAndRemovable() && - !new_instance->IsFinalizable() && - !new_instance->NeedsChecks()) { + if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) { + DCHECK(!new_instance->IsFinalizable()); + // new_instance can potentially be eliminated. singleton_new_instances_.push_back(new_instance); } - ArenaVector<HInstruction*>& heap_values = + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_instance->GetBlock()->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HInstruction* ref = @@ -603,9 +905,15 @@ class LSEVisitor : public HGraphVisitor { return; } if (ref_info->IsSingletonAndRemovable()) { - singleton_new_arrays_.push_back(new_array); + if (new_array->GetLength()->IsIntConstant() && + new_array->GetLength()->AsIntConstant()->GetValue() >= 0) { + // new_array can potentially be eliminated. + singleton_new_instances_.push_back(new_array); + } else { + // new_array may throw NegativeArraySizeException. Keep it. + } } - ArenaVector<HInstruction*>& heap_values = + ScopedArenaVector<HInstruction*>& heap_values = heap_values_for_[new_array->GetBlock()->GetBlockId()]; for (size_t i = 0; i < heap_values.size(); i++) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); @@ -617,35 +925,25 @@ class LSEVisitor : public HGraphVisitor { } } - // Find an instruction's substitute if it should be removed. - // Return the same instruction if it should not be removed. - HInstruction* FindSubstitute(HInstruction* instruction) { - size_t size = removed_loads_.size(); - for (size_t i = 0; i < size; i++) { - if (removed_loads_[i] == instruction) { - return substitute_instructions_for_loads_[i]; - } - } - return instruction; - } - const HeapLocationCollector& heap_location_collector_; const SideEffectsAnalysis& side_effects_; + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator_; + // One array of heap values for each block. - ArenaVector<ArenaVector<HInstruction*>> heap_values_for_; + ScopedArenaVector<ScopedArenaVector<HInstruction*>> heap_values_for_; // We record the instructions that should be eliminated but may be // used by heap locations. They'll be removed in the end. - ArenaVector<HInstruction*> removed_loads_; - ArenaVector<HInstruction*> substitute_instructions_for_loads_; + ScopedArenaVector<HInstruction*> removed_loads_; + ScopedArenaVector<HInstruction*> substitute_instructions_for_loads_; // Stores in this list may be removed from the list later when it's // found that the store cannot be eliminated. - ArenaVector<HInstruction*> possibly_removed_stores_; + ScopedArenaVector<HInstruction*> possibly_removed_stores_; - ArenaVector<HInstruction*> singleton_new_instances_; - ArenaVector<HInstruction*> singleton_new_arrays_; + ScopedArenaVector<HInstruction*> singleton_new_instances_; DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; @@ -663,7 +961,12 @@ void LoadStoreElimination::Run() { return; } - LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_); + // TODO: analyze VecLoad/VecStore better. + if (graph_->HasSIMD()) { + return; + } + + LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_, stats_); for (HBasicBlock* block : graph_->GetReversePostOrder()) { lse_visitor.VisitBasicBlock(block); } diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h index efe71c733a..7153541baf 100644 --- a/compiler/optimizing/load_store_elimination.h +++ b/compiler/optimizing/load_store_elimination.h @@ -28,8 +28,10 @@ class LoadStoreElimination : public HOptimization { public: LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects, - const LoadStoreAnalysis& lsa) - : HOptimization(graph, kLoadStoreEliminationPassName), + const LoadStoreAnalysis& lsa, + OptimizingCompilerStats* stats, + const char* name = kLoadStoreEliminationPassName) + : HOptimization(graph, name, stats), side_effects_(side_effects), lsa_(lsa) {} diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc index a9fe209063..5879c6fa07 100644 --- a/compiler/optimizing/locations.cc +++ b/compiler/optimizing/locations.cc @@ -18,8 +18,8 @@ #include <type_traits> -#include "nodes.h" #include "code_generator.h" +#include "nodes.h" namespace art { @@ -28,10 +28,10 @@ static_assert(std::is_trivially_copyable<Location>::value, "Location should be t LocationSummary::LocationSummary(HInstruction* instruction, CallKind call_kind, - bool intrinsified) - : inputs_(instruction->InputCount(), - instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), - temps_(instruction->GetBlock()->GetGraph()->GetArena()->Adapter(kArenaAllocLocationSummary)), + bool intrinsified, + ArenaAllocator* allocator) + : inputs_(instruction->InputCount(), allocator->Adapter(kArenaAllocLocationSummary)), + temps_(allocator->Adapter(kArenaAllocLocationSummary)), call_kind_(call_kind), intrinsified_(intrinsified), has_custom_slow_path_calling_convention_(false), @@ -43,11 +43,17 @@ LocationSummary::LocationSummary(HInstruction* instruction, instruction->SetLocations(this); if (NeedsSafepoint()) { - ArenaAllocator* arena = instruction->GetBlock()->GetGraph()->GetArena(); - stack_mask_ = ArenaBitVector::Create(arena, 0, true, kArenaAllocLocationSummary); + stack_mask_ = ArenaBitVector::Create(allocator, 0, true, kArenaAllocLocationSummary); } } +LocationSummary::LocationSummary(HInstruction* instruction, + CallKind call_kind, + bool intrinsified) + : LocationSummary(instruction, + call_kind, + intrinsified, + instruction->GetBlock()->GetGraph()->GetAllocator()) {} Location Location::RegisterOrConstant(HInstruction* instruction) { return instruction->IsConstant() diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 6f0dbce2df..d56c151748 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -665,6 +665,11 @@ class LocationSummary : public ArenaObject<kArenaAllocLocationSummary> { } private: + LocationSummary(HInstruction* instruction, + CallKind call_kind, + bool intrinsified, + ArenaAllocator* allocator); + ArenaVector<Location> inputs_; ArenaVector<Location> temps_; const CallKind call_kind_; diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc index 422e58debb..9f278a9f4e 100644 --- a/compiler/optimizing/loop_optimization.cc +++ b/compiler/optimizing/loop_optimization.cc @@ -16,23 +16,41 @@ #include "loop_optimization.h" -#include "arch/instruction_set.h" #include "arch/arm/instruction_set_features_arm.h" #include "arch/arm64/instruction_set_features_arm64.h" +#include "arch/instruction_set.h" #include "arch/mips/instruction_set_features_mips.h" #include "arch/mips64/instruction_set_features_mips64.h" #include "arch/x86/instruction_set_features_x86.h" #include "arch/x86_64/instruction_set_features_x86_64.h" #include "driver/compiler_driver.h" #include "linear_order.h" +#include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { // Enables vectorization (SIMDization) in the loop optimizer. static constexpr bool kEnableVectorization = true; -// All current SIMD targets want 16-byte alignment. -static constexpr size_t kAlignedBase = 16; +// No loop unrolling factor (just one copy of the loop-body). +static constexpr uint32_t kNoUnrollingFactor = 1; + +// +// Static helpers. +// + +// Base alignment for arrays/strings guaranteed by the Android runtime. +static uint32_t BaseAlignment() { + return kObjectAlignment; +} + +// Hidden offset for arrays/strings guaranteed by the Android runtime. +static uint32_t HiddenOffset(DataType::Type type, bool is_string_char_at) { + return is_string_char_at + ? mirror::String::ValueOffset().Uint32Value() + : mirror::Array::DataOffset(DataType::Size(type)).Uint32Value(); +} // Remove the instruction from the graph. A bit more elaborate than the usual // instruction removal, since there may be a cycle in the use structure. @@ -40,6 +58,8 @@ static void RemoveFromCycle(HInstruction* instruction) { instruction->RemoveAsUserOfAllInputs(); instruction->RemoveEnvironmentUsers(); instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false); + RemoveEnvironmentUses(instruction); + ResetEnvironmentInputRecords(instruction); } // Detect a goto block and sets succ to the single successor. @@ -66,27 +86,32 @@ static bool IsEarlyExit(HLoopInformation* loop_info) { return false; } -// Detect a sign extension from the given type. Returns the promoted operand on success. +// Forward declaration. +static bool IsZeroExtensionAndGet(HInstruction* instruction, + DataType::Type type, + /*out*/ HInstruction** operand); + +// Detect a sign extension in instruction from the given type. +// Returns the promoted operand on success. static bool IsSignExtensionAndGet(HInstruction* instruction, - Primitive::Type type, + DataType::Type type, /*out*/ HInstruction** operand) { // Accept any already wider constant that would be handled properly by sign // extension when represented in the *width* of the given narrower data type - // (the fact that char normally zero extends does not matter here). + // (the fact that Uint8/Uint16 normally zero extend does not matter here). int64_t value = 0; if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { - case Primitive::kPrimByte: - if (std::numeric_limits<int8_t>::min() <= value && - std::numeric_limits<int8_t>::max() >= value) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + if (IsInt<8>(value)) { *operand = instruction; return true; } return false; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - if (std::numeric_limits<int16_t>::min() <= value && - std::numeric_limits<int16_t>::max() <= value) { + case DataType::Type::kUint16: + case DataType::Type::kInt16: + if (IsInt<16>(value)) { *operand = instruction; return true; } @@ -95,46 +120,63 @@ static bool IsSignExtensionAndGet(HInstruction* instruction, return false; } } - // An implicit widening conversion of a signed integer to an integral type sign-extends - // the two's-complement representation of the integer value to fill the wider format. - if (instruction->GetType() == type && (instruction->IsArrayGet() || - instruction->IsStaticFieldGet() || - instruction->IsInstanceFieldGet())) { + // An implicit widening conversion of any signed expression sign-extends. + if (instruction->GetType() == type) { switch (type) { - case Primitive::kPrimByte: - case Primitive::kPrimShort: + case DataType::Type::kInt8: + case DataType::Type::kInt16: *operand = instruction; return true; default: return false; } } - // TODO: perhaps explicit conversions later too? - // (this may return something different from instruction) + // An explicit widening conversion of a signed expression sign-extends. + if (instruction->IsTypeConversion()) { + HInstruction* conv = instruction->InputAt(0); + DataType::Type from = conv->GetType(); + switch (instruction->GetType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + if (type == from && (from == DataType::Type::kInt8 || + from == DataType::Type::kInt16 || + from == DataType::Type::kInt32)) { + *operand = conv; + return true; + } + return false; + case DataType::Type::kInt16: + return type == DataType::Type::kUint16 && + from == DataType::Type::kUint16 && + IsZeroExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand); + default: + return false; + } + } return false; } -// Detect a zero extension from the given type. Returns the promoted operand on success. +// Detect a zero extension in instruction from the given type. +// Returns the promoted operand on success. static bool IsZeroExtensionAndGet(HInstruction* instruction, - Primitive::Type type, + DataType::Type type, /*out*/ HInstruction** operand) { // Accept any already wider constant that would be handled properly by zero // extension when represented in the *width* of the given narrower data type - // (the fact that byte/short normally sign extend does not matter here). + // (the fact that Int8/Int16 normally sign extend does not matter here). int64_t value = 0; if (IsInt64AndGet(instruction, /*out*/ &value)) { switch (type) { - case Primitive::kPrimByte: - if (std::numeric_limits<uint8_t>::min() <= value && - std::numeric_limits<uint8_t>::max() >= value) { + case DataType::Type::kUint8: + case DataType::Type::kInt8: + if (IsUint<8>(value)) { *operand = instruction; return true; } return false; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - if (std::numeric_limits<uint16_t>::min() <= value && - std::numeric_limits<uint16_t>::max() <= value) { + case DataType::Type::kUint16: + case DataType::Type::kInt16: + if (IsUint<16>(value)) { *operand = instruction; return true; } @@ -143,36 +185,37 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, return false; } } - // An implicit widening conversion of a char to an integral type zero-extends - // the representation of the char value to fill the wider format. - if (instruction->GetType() == type && (instruction->IsArrayGet() || - instruction->IsStaticFieldGet() || - instruction->IsInstanceFieldGet())) { - if (type == Primitive::kPrimChar) { - *operand = instruction; - return true; + // An implicit widening conversion of any unsigned expression zero-extends. + if (instruction->GetType() == type) { + switch (type) { + case DataType::Type::kUint8: + case DataType::Type::kUint16: + *operand = instruction; + return true; + default: + return false; } } - // A sign (or zero) extension followed by an explicit removal of just the - // higher sign bits is equivalent to a zero extension of the underlying operand. - if (instruction->IsAnd()) { - int64_t mask = 0; - HInstruction* a = instruction->InputAt(0); - HInstruction* b = instruction->InputAt(1); - // In (a & b) find (mask & b) or (a & mask) with sign or zero extension on the non-mask. - if ((IsInt64AndGet(a, /*out*/ &mask) && (IsSignExtensionAndGet(b, type, /*out*/ operand) || - IsZeroExtensionAndGet(b, type, /*out*/ operand))) || - (IsInt64AndGet(b, /*out*/ &mask) && (IsSignExtensionAndGet(a, type, /*out*/ operand) || - IsZeroExtensionAndGet(a, type, /*out*/ operand)))) { - switch ((*operand)->GetType()) { - case Primitive::kPrimByte: return mask == std::numeric_limits<uint8_t>::max(); - case Primitive::kPrimChar: - case Primitive::kPrimShort: return mask == std::numeric_limits<uint16_t>::max(); - default: return false; - } + // An explicit widening conversion of an unsigned expression zero-extends. + if (instruction->IsTypeConversion()) { + HInstruction* conv = instruction->InputAt(0); + DataType::Type from = conv->GetType(); + switch (instruction->GetType()) { + case DataType::Type::kInt32: + case DataType::Type::kInt64: + if (type == from && from == DataType::Type::kUint16) { + *operand = conv; + return true; + } + return false; + case DataType::Type::kUint16: + return type == DataType::Type::kInt16 && + from == DataType::Type::kInt16 && + IsSignExtensionAndGet(instruction->InputAt(0), type, /*out*/ operand); + default: + return false; } } - // TODO: perhaps explicit conversions later too? return false; } @@ -180,14 +223,19 @@ static bool IsZeroExtensionAndGet(HInstruction* instruction, // Returns true on success and sets is_unsigned accordingly. static bool IsNarrowerOperands(HInstruction* a, HInstruction* b, - Primitive::Type type, + DataType::Type type, /*out*/ HInstruction** r, /*out*/ HInstruction** s, /*out*/ bool* is_unsigned) { - if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) { + // Look for a matching sign extension. + DataType::Type stype = HVecOperation::ToSignedType(type); + if (IsSignExtensionAndGet(a, stype, r) && IsSignExtensionAndGet(b, stype, s)) { *is_unsigned = false; return true; - } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) { + } + // Look for a matching zero extension. + DataType::Type utype = HVecOperation::ToUnsignedType(type); + if (IsZeroExtensionAndGet(a, utype, r) && IsZeroExtensionAndGet(b, utype, s)) { *is_unsigned = true; return true; } @@ -196,19 +244,32 @@ static bool IsNarrowerOperands(HInstruction* a, // As above, single operand. static bool IsNarrowerOperand(HInstruction* a, - Primitive::Type type, + DataType::Type type, /*out*/ HInstruction** r, /*out*/ bool* is_unsigned) { - if (IsSignExtensionAndGet(a, type, r)) { + // Look for a matching sign extension. + DataType::Type stype = HVecOperation::ToSignedType(type); + if (IsSignExtensionAndGet(a, stype, r)) { *is_unsigned = false; return true; - } else if (IsZeroExtensionAndGet(a, type, r)) { + } + // Look for a matching zero extension. + DataType::Type utype = HVecOperation::ToUnsignedType(type); + if (IsZeroExtensionAndGet(a, utype, r)) { *is_unsigned = true; return true; } return false; } +// Compute relative vector length based on type difference. +static uint32_t GetOtherVL(DataType::Type other_type, DataType::Type vector_type, uint32_t vl) { + DCHECK(DataType::IsIntegralType(other_type)); + DCHECK(DataType::IsIntegralType(vector_type)); + DCHECK_GE(DataType::SizeShift(other_type), DataType::SizeShift(vector_type)); + return vl >> (DataType::SizeShift(other_type) - DataType::SizeShift(vector_type)); +} + // Detect up to two instructions a and b, and an acccumulated constant c. static bool IsAddConstHelper(HInstruction* instruction, /*out*/ HInstruction** a, @@ -254,6 +315,44 @@ static bool IsAddConst(HInstruction* instruction, return false; } +// Detect a + c for constant c. +static bool IsAddConst(HInstruction* instruction, + /*out*/ HInstruction** a, + /*out*/ int64_t* c) { + if (instruction->IsAdd()) { + if (IsInt64AndGet(instruction->InputAt(0), c)) { + *a = instruction->InputAt(1); + return true; + } else if (IsInt64AndGet(instruction->InputAt(1), c)) { + *a = instruction->InputAt(0); + return true; + } + } + return false; +} + +// Detect reductions of the following forms, +// x = x_phi + .. +// x = x_phi - .. +static bool HasReductionFormat(HInstruction* reduction, HInstruction* phi) { + if (reduction->IsAdd()) { + return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi) || + (reduction->InputAt(0) != phi && reduction->InputAt(1) == phi); + } else if (reduction->IsSub()) { + return (reduction->InputAt(0) == phi && reduction->InputAt(1) != phi); + } + return false; +} + +// Translates vector operation to reduction kind. +static HVecReduce::ReductionKind GetReductionKind(HVecOperation* reduction) { + if (reduction->IsVecAdd() || reduction->IsVecSub() || reduction->IsVecSADAccumulate()) { + return HVecReduce::kSum; + } + LOG(FATAL) << "Unsupported SIMD reduction " << reduction->GetId(); + UNREACHABLE(); +} + // Test vector restrictions. static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) { return (restrictions & tested) != 0; @@ -267,29 +366,52 @@ static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) { return instruction; } +// Check that instructions from the induction sets are fully removed: have no uses +// and no other instructions use them. +static bool CheckInductionSetFullyRemoved(ScopedArenaSet<HInstruction*>* iset) { + for (HInstruction* instr : *iset) { + if (instr->GetBlock() != nullptr || + !instr->GetUses().empty() || + !instr->GetEnvUses().empty() || + HasEnvironmentUsedByOthers(instr)) { + return false; + } + } + return true; +} + // -// Class methods. +// Public methods. // HLoopOptimization::HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis) - : HOptimization(graph, kLoopOptimizationPassName), + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats, + const char* name) + : HOptimization(graph, name, stats), compiler_driver_(compiler_driver), induction_range_(induction_analysis), loop_allocator_(nullptr), - global_allocator_(graph_->GetArena()), + global_allocator_(graph_->GetAllocator()), top_loop_(nullptr), last_loop_(nullptr), iset_(nullptr), - induction_simplication_count_(0), + reductions_(nullptr), simplified_(false), vector_length_(0), vector_refs_(nullptr), - vector_peeling_candidate_(nullptr), + vector_static_peeling_factor_(0), + vector_dynamic_peeling_candidate_(nullptr), vector_runtime_test_a_(nullptr), vector_runtime_test_b_(nullptr), - vector_map_(nullptr) { + vector_map_(nullptr), + vector_permanent_map_(nullptr), + vector_mode_(kSequential), + vector_preheader_(nullptr), + vector_header_(nullptr), + vector_body_(nullptr), + vector_index_(nullptr) { } void HLoopOptimization::Run() { @@ -299,10 +421,8 @@ void HLoopOptimization::Run() { return; } - // Phase-local allocator that draws from the global pool. Since the allocator - // itself resides on the stack, it is destructed on exiting Run(), which - // implies its underlying memory is released immediately. - ArenaAllocator allocator(global_allocator_->GetArenaPool()); + // Phase-local allocator. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); loop_allocator_ = &allocator; // Perform loop optimizations. @@ -316,11 +436,15 @@ void HLoopOptimization::Run() { last_loop_ = top_loop_ = nullptr; } +// +// Loop setup and traversal. +// + void HLoopOptimization::LocalRun() { // Build the linear order using the phase-local allocator. This step enables building // a loop hierarchy that properly reflects the outer-inner and previous-next relation. - ArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); - LinearizeGraph(graph_, loop_allocator_, &linear_order); + ScopedArenaVector<HBasicBlock*> linear_order(loop_allocator_->Adapter(kArenaAllocLinearOrder)); + LinearizeGraph(graph_, &linear_order); // Build the loop hierarchy. for (HBasicBlock* block : linear_order) { @@ -333,20 +457,28 @@ void HLoopOptimization::LocalRun() { // temporary data structures using the phase-local allocator. All new HIR // should use the global allocator. if (top_loop_ != nullptr) { - ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); - ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); - ArenaSafeMap<HInstruction*, HInstruction*> map( + ScopedArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSafeMap<HInstruction*, HInstruction*> reds( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSafeMap<HInstruction*, HInstruction*> map( + std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); + ScopedArenaSafeMap<HInstruction*, HInstruction*> perm( std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization)); // Attach. iset_ = &iset; + reductions_ = &reds; vector_refs_ = &refs; vector_map_ = ↦ + vector_permanent_map_ = &perm; // Traverse. TraverseLoopsInnerToOuter(top_loop_); // Detach. iset_ = nullptr; + reductions_ = nullptr; vector_refs_ = nullptr; vector_map_ = nullptr; + vector_permanent_map_ = nullptr; } } @@ -397,16 +529,12 @@ void HLoopOptimization::RemoveLoop(LoopNode* node) { } } -void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { +bool HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { + bool changed = false; for ( ; node != nullptr; node = node->next) { - // Visit inner loops first. - uint32_t current_induction_simplification_count = induction_simplication_count_; - if (node->inner != nullptr) { - TraverseLoopsInnerToOuter(node->inner); - } - // Recompute induction information of this loop if the induction - // of any inner loop has been simplified. - if (current_induction_simplification_count != induction_simplication_count_) { + // Visit inner loops first. Recompute induction information for this + // loop if the induction of any inner loop has changed. + if (TraverseLoopsInnerToOuter(node->inner)) { induction_range_.ReVisit(node->loop_info); } // Repeat simplifications in the loop-body until no more changes occur. @@ -416,12 +544,14 @@ void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) { simplified_ = false; SimplifyInduction(node); SimplifyBlocks(node); + changed = simplified_ || changed; } while (simplified_); // Optimize inner loop. if (node->inner == nullptr) { - OptimizeInnerLoop(node); + changed = OptimizeInnerLoop(node) || changed; } } + return changed; } // @@ -438,17 +568,18 @@ void HLoopOptimization::SimplifyInduction(LoopNode* node) { // for (int i = 0; i < 10; i++, k++) { .... no k .... } return k; for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { HPhi* phi = it.Current()->AsPhi(); - iset_->clear(); // prepare phi induction if (TrySetPhiInduction(phi, /*restrict_uses*/ true) && TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) { // Note that it's ok to have replaced uses after the loop with the last value, without // being able to remove the cycle. Environment uses (which are the reason we may not be - // able to remove the cycle) within the loop will still hold the right value. + // able to remove the cycle) within the loop will still hold the right value. We must + // have tried first, however, to replace outside uses. if (CanRemoveCycle()) { + simplified_ = true; for (HInstruction* i : *iset_) { RemoveFromCycle(i); } - simplified_ = true; + DCHECK(CheckInductionSetFullyRemoved(iset_)); } } } @@ -491,21 +622,20 @@ void HLoopOptimization::SimplifyBlocks(LoopNode* node) { } } -void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { +bool HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); // Ensure loop header logic is finite. int64_t trip_count = 0; if (!induction_range_.IsFinite(node->loop_info, &trip_count)) { - return; + return false; } - // Ensure there is only a single loop-body (besides the header). HBasicBlock* body = nullptr; for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) { if (it.Current() != header) { if (body != nullptr) { - return; + return false; } body = it.Current(); } @@ -513,27 +643,27 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { CHECK(body != nullptr); // Ensure there is only a single exit point. if (header->GetSuccessors().size() != 2) { - return; + return false; } HBasicBlock* exit = (header->GetSuccessors()[0] == body) ? header->GetSuccessors()[1] : header->GetSuccessors()[0]; // Ensure exit can only be reached by exiting loop. if (exit->GetPredecessors().size() != 1) { - return; + return false; } // Detect either an empty loop (no side effects other than plain iteration) or // a trivial loop (just iterating once). Replace subsequent index uses, if any, // with the last value and remove the loop, possibly after unrolling its body. - HInstruction* phi = header->GetFirstPhi(); - iset_->clear(); // prepare phi induction - if (TrySetSimpleLoopHeader(header)) { + HPhi* main_phi = nullptr; + if (TrySetSimpleLoopHeader(header, &main_phi)) { bool is_empty = IsEmptyBody(body); - if ((is_empty || trip_count == 1) && - TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { + if (reductions_->empty() && // TODO: possible with some effort + (is_empty || trip_count == 1) && + TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { if (!is_empty) { // Unroll the loop-body, which sees initial value of the index. - phi->ReplaceWith(phi->InputAt(0)); + main_phi->ReplaceWith(main_phi->InputAt(0)); preheader->MergeInstructionsWith(body); } body->DisconnectAndDelete(); @@ -546,21 +676,20 @@ void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) { preheader->AddDominatedBlock(exit); exit->SetDominator(preheader); RemoveLoop(node); // update hierarchy - return; + return true; } } - // Vectorize loop, if possible and valid. - if (kEnableVectorization) { - iset_->clear(); // prepare phi induction - if (TrySetSimpleLoopHeader(header) && - ShouldVectorize(node, body, trip_count) && - TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) { - Vectorize(node, body, exit, trip_count); - graph_->SetHasSIMD(true); // flag SIMD usage - return; - } + if (kEnableVectorization && + TrySetSimpleLoopHeader(header, &main_phi) && + ShouldVectorize(node, body, trip_count) && + TryAssignLastValue(node->loop_info, main_phi, preheader, /*collect_loop_uses*/ true)) { + Vectorize(node, body, exit, trip_count); + graph_->SetHasSIMD(true); // flag SIMD usage + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorized); + return true; } + return false; } // @@ -573,9 +702,10 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 // Reset vector bookkeeping. vector_length_ = 0; vector_refs_->clear(); - vector_peeling_candidate_ = nullptr; + vector_static_peeling_factor_ = 0; + vector_dynamic_peeling_candidate_ = nullptr; vector_runtime_test_a_ = - vector_runtime_test_b_= nullptr; + vector_runtime_test_b_ = nullptr; // Phis in the loop-body prevent vectorization. if (!block->GetPhis().IsEmpty()) { @@ -590,10 +720,17 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 } } - // Does vectorization seem profitable? - if (!IsVectorizationProfitable(trip_count)) { - return false; - } + // Prepare alignment analysis: + // (1) find desired alignment (SIMD vector size in bytes). + // (2) initialize static loop peeling votes (peeling factor that will + // make one particular reference aligned), never to exceed (1). + // (3) variable to record how many references share same alignment. + // (4) variable to record suitable candidate for dynamic loop peeling. + uint32_t desired_alignment = GetVectorSizeInBytes(); + DCHECK_LE(desired_alignment, 16u); + uint32_t peeling_votes[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + uint32_t max_num_same_alignment = 0; + const ArrayReference* peeling_candidate = nullptr; // Data dependence analysis. Find each pair of references with same type, where // at least one is a write. Each such pair denotes a possible data dependence. @@ -601,7 +738,10 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 // aliased, as well as the property that references either point to the same // array or to two completely disjoint arrays, i.e., no partial aliasing. // Other than a few simply heuristics, no detailed subscript analysis is done. + // The scan over references also prepares finding a suitable alignment strategy. for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) { + uint32_t num_same_alignment = 0; + // Scan over all next references. for (auto j = i; ++j != vector_refs_->end(); ) { if (i->type == j->type && (i->lhs || j->lhs)) { // Found same-typed a[i+x] vs. b[i+y], where at least one is a write. @@ -615,6 +755,10 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 if (x != y) { return false; } + // Count the number of references that have the same alignment (since + // base and offset are the same) and where at least one is a write, so + // e.g. a[i] = a[i] + b[i] counts a[i] but not b[i]). + num_same_alignment++; } else { // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence). // Conservatively assume a potential loop-carried data dependence otherwise, avoided by @@ -633,10 +777,38 @@ bool HLoopOptimization::ShouldVectorize(LoopNode* node, HBasicBlock* block, int6 } } } - } + // Update information for finding suitable alignment strategy: + // (1) update votes for static loop peeling, + // (2) update suitable candidate for dynamic loop peeling. + Alignment alignment = ComputeAlignment(i->offset, i->type, i->is_string_char_at); + if (alignment.Base() >= desired_alignment) { + // If the array/string object has a known, sufficient alignment, use the + // initial offset to compute the static loop peeling vote (this always + // works, since elements have natural alignment). + uint32_t offset = alignment.Offset() & (desired_alignment - 1u); + uint32_t vote = (offset == 0) + ? 0 + : ((desired_alignment - offset) >> DataType::SizeShift(i->type)); + DCHECK_LT(vote, 16u); + ++peeling_votes[vote]; + } else if (BaseAlignment() >= desired_alignment && + num_same_alignment > max_num_same_alignment) { + // Otherwise, if the array/string object has a known, sufficient alignment + // for just the base but with an unknown offset, record the candidate with + // the most occurrences for dynamic loop peeling (again, the peeling always + // works, since elements have natural alignment). + max_num_same_alignment = num_same_alignment; + peeling_candidate = &(*i); + } + } // for i + + // Find a suitable alignment strategy. + SetAlignmentStrategy(peeling_votes, peeling_candidate); - // Consider dynamic loop peeling for alignment. - SetPeelingCandidate(trip_count); + // Does vectorization seem profitable? + if (!IsVectorizationProfitable(trip_count)) { + return false; + } // Success! return true; @@ -646,7 +818,6 @@ void HLoopOptimization::Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count) { - Primitive::Type induc_type = Primitive::kPrimInt; HBasicBlock* header = node->loop_info->GetHeader(); HBasicBlock* preheader = node->loop_info->GetPreHeader(); @@ -654,31 +825,59 @@ void HLoopOptimization::Vectorize(LoopNode* node, uint32_t unroll = GetUnrollingFactor(block, trip_count); uint32_t chunk = vector_length_ * unroll; + DCHECK(trip_count == 0 || (trip_count >= MaxNumberPeeled() + chunk)); + // A cleanup loop is needed, at least, for any unknown trip count or // for a known trip count with remainder iterations after vectorization. - bool needs_cleanup = trip_count == 0 || (trip_count % chunk) != 0; + bool needs_cleanup = trip_count == 0 || + ((trip_count - vector_static_peeling_factor_) % chunk) != 0; // Adjust vector bookkeeping. - iset_->clear(); // prepare phi induction - bool is_simple_loop_header = TrySetSimpleLoopHeader(header); // fills iset_ + HPhi* main_phi = nullptr; + bool is_simple_loop_header = TrySetSimpleLoopHeader(header, &main_phi); // refills sets DCHECK(is_simple_loop_header); vector_header_ = header; vector_body_ = block; - // Generate dynamic loop peeling trip count, if needed: - // ptc = <peeling-needed-for-candidate> + // Loop induction type. + DataType::Type induc_type = main_phi->GetType(); + DCHECK(induc_type == DataType::Type::kInt32 || induc_type == DataType::Type::kInt64) + << induc_type; + + // Generate the trip count for static or dynamic loop peeling, if needed: + // ptc = <peeling factor>; HInstruction* ptc = nullptr; - if (vector_peeling_candidate_ != nullptr) { - DCHECK_LT(vector_length_, trip_count) << "dynamic peeling currently requires known trip count"; - // - // TODO: Implement this. Compute address of first access memory location and - // compute peeling factor to obtain kAlignedBase alignment. - // - needs_cleanup = true; + if (vector_static_peeling_factor_ != 0) { + // Static loop peeling for SIMD alignment (using the most suitable + // fixed peeling factor found during prior alignment analysis). + DCHECK(vector_dynamic_peeling_candidate_ == nullptr); + ptc = graph_->GetConstant(induc_type, vector_static_peeling_factor_); + } else if (vector_dynamic_peeling_candidate_ != nullptr) { + // Dynamic loop peeling for SIMD alignment (using the most suitable + // candidate found during prior alignment analysis): + // rem = offset % ALIGN; // adjusted as #elements + // ptc = rem == 0 ? 0 : (ALIGN - rem); + uint32_t shift = DataType::SizeShift(vector_dynamic_peeling_candidate_->type); + uint32_t align = GetVectorSizeInBytes() >> shift; + uint32_t hidden_offset = HiddenOffset(vector_dynamic_peeling_candidate_->type, + vector_dynamic_peeling_candidate_->is_string_char_at); + HInstruction* adjusted_offset = graph_->GetConstant(induc_type, hidden_offset >> shift); + HInstruction* offset = Insert(preheader, new (global_allocator_) HAdd( + induc_type, vector_dynamic_peeling_candidate_->offset, adjusted_offset)); + HInstruction* rem = Insert(preheader, new (global_allocator_) HAnd( + induc_type, offset, graph_->GetConstant(induc_type, align - 1u))); + HInstruction* sub = Insert(preheader, new (global_allocator_) HSub( + induc_type, graph_->GetConstant(induc_type, align), rem)); + HInstruction* cond = Insert(preheader, new (global_allocator_) HEqual( + rem, graph_->GetConstant(induc_type, 0))); + ptc = Insert(preheader, new (global_allocator_) HSelect( + cond, graph_->GetConstant(induc_type, 0), sub, kNoDexPc)); + needs_cleanup = true; // don't know the exact amount } // Generate loop control: // stc = <trip-count>; + // ptc = min(stc, ptc); // vtc = stc - (stc - ptc) % chunk; // i = 0; HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader); @@ -687,15 +886,19 @@ void HLoopOptimization::Vectorize(LoopNode* node, DCHECK(IsPowerOfTwo(chunk)); HInstruction* diff = stc; if (ptc != nullptr) { + if (trip_count == 0) { + HInstruction* cond = Insert(preheader, new (global_allocator_) HAboveOrEqual(stc, ptc)); + ptc = Insert(preheader, new (global_allocator_) HSelect(cond, ptc, stc, kNoDexPc)); + } diff = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, ptc)); } HInstruction* rem = Insert( preheader, new (global_allocator_) HAnd(induc_type, diff, - graph_->GetIntConstant(chunk - 1))); + graph_->GetConstant(induc_type, chunk - 1))); vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem)); } - vector_index_ = graph_->GetIntConstant(0); + vector_index_ = graph_->GetConstant(induc_type, 0); // Generate runtime disambiguation test: // vtc = a != b ? vtc : 0; @@ -704,13 +907,18 @@ void HLoopOptimization::Vectorize(LoopNode* node, preheader, new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_)); vtc = Insert(preheader, - new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc)); + new (global_allocator_) + HSelect(rt, vtc, graph_->GetConstant(induc_type, 0), kNoDexPc)); needs_cleanup = true; } - // Generate dynamic peeling loop for alignment, if needed: + // Generate alignment peeling loop, if needed: // for ( ; i < ptc; i += 1) // <loop-body> + // + // NOTE: The alignment forced by the peeling loop is preserved even if data is + // moved around during suspend checks, since all analysis was based on + // nothing more than the Android runtime alignment conventions. if (ptc != nullptr) { vector_mode_ = kSequential; GenerateNewLoop(node, @@ -718,8 +926,8 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, ptc, - graph_->GetIntConstant(1), - /*unroll*/ 1); + graph_->GetConstant(induc_type, 1), + kNoUnrollingFactor); } // Generate vector loop, possibly further unrolled: @@ -731,7 +939,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, vtc, - graph_->GetIntConstant(vector_length_), // increment per unroll + graph_->GetConstant(induc_type, vector_length_), // increment per unroll unroll); HLoopInformation* vloop = vector_header_->GetLoopInformation(); @@ -745,8 +953,21 @@ void HLoopOptimization::Vectorize(LoopNode* node, graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit), vector_index_, stc, - graph_->GetIntConstant(1), - /*unroll*/ 1); + graph_->GetConstant(induc_type, 1), + kNoUnrollingFactor); + } + + // Link reductions to their final uses. + for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { + if (i->first->IsPhi()) { + HInstruction* phi = i->first; + HInstruction* repl = ReduceAndExtractIfNeeded(i->second); + // Deal with regular uses. + for (const HUseListNode<HInstruction*>& use : phi->GetUses()) { + induction_range_.Replace(use.GetUser(), phi, repl); // update induction use + } + phi->ReplaceWith(repl); + } } // Remove the original loop by disconnecting the body block @@ -755,6 +976,7 @@ void HLoopOptimization::Vectorize(LoopNode* node, while (!header->GetFirstInstruction()->IsGoto()) { header->RemoveInstruction(header->GetFirstInstruction()); } + // Update loop hierarchy: the old header now resides in the same outer loop // as the old preheader. Note that we don't bother putting sequential // loops back in the hierarchy at this point. @@ -770,7 +992,7 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, HInstruction* step, uint32_t unroll) { DCHECK(unroll == 1 || vector_mode_ == kVector); - Primitive::Type induc_type = Primitive::kPrimInt; + DataType::Type induc_type = lo->GetType(); // Prepare new loop. vector_preheader_ = new_preheader, vector_header_ = vector_preheader_->GetSingleSuccessor(); @@ -787,21 +1009,10 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, vector_header_->AddInstruction(cond); vector_header_->AddInstruction(new (global_allocator_) HIf(cond)); vector_index_ = phi; + vector_permanent_map_->clear(); // preserved over unrolling for (uint32_t u = 0; u < unroll; u++) { - // Clear map, leaving loop invariants setup during unrolling. - if (u == 0) { - vector_map_->clear(); - } else { - for (auto i = vector_map_->begin(); i != vector_map_->end(); ) { - if (i->second->IsVecReplicateScalar()) { - DCHECK(node->loop_info->IsDefinedOutOfTheLoop(i->first)); - ++i; - } else { - i = vector_map_->erase(i); - } - } - } // Generate instruction map. + vector_map_->clear(); for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true); DCHECK(vectorized_def); @@ -818,16 +1029,23 @@ void HLoopOptimization::GenerateNewLoop(LoopNode* node, } } } + // Generate the induction. vector_index_ = new (global_allocator_) HAdd(induc_type, vector_index_, step); Insert(vector_body_, vector_index_); } - // Finalize phi for the loop index. + // Finalize phi inputs for the reductions (if any). + for (auto i = reductions_->begin(); i != reductions_->end(); ++i) { + if (!i->first->IsPhi()) { + DCHECK(i->second->IsPhi()); + GenerateVecReductionPhiInputs(i->second->AsPhi(), i->first); + } + } + // Finalize phi inputs for the loop index. phi->AddInput(lo); phi->AddInput(vector_index_); vector_index_ = phi; } -// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc. bool HLoopOptimization::VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code) { @@ -838,7 +1056,7 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, // (4) vectorizable right-hand-side value. uint64_t restrictions = kNone; if (instruction->IsArraySet()) { - Primitive::Type type = instruction->AsArraySet()->GetComponentType(); + DataType::Type type = instruction->AsArraySet()->GetComponentType(); HInstruction* base = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); HInstruction* value = instruction->InputAt(2); @@ -857,6 +1075,25 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, } return false; } + // Accept a left-hand-side reduction for + // (1) supported vector type, + // (2) vectorizable right-hand-side value. + auto redit = reductions_->find(instruction); + if (redit != reductions_->end()) { + DataType::Type type = instruction->GetType(); + // Recognize SAD idiom or direct reduction. + if (VectorizeSADIdiom(node, instruction, generate_code, type, restrictions) || + (TrySetVectorType(type, &restrictions) && + VectorizeUse(node, instruction, generate_code, type, restrictions))) { + if (generate_code) { + HInstruction* new_red = vector_map_->Get(instruction); + vector_permanent_map_->Put(new_red, vector_map_->Get(redit->second)); + vector_permanent_map_->Overwrite(redit->second, new_red); + } + return true; + } + return false; + } // Branch back okay. if (instruction->IsGoto()) { return true; @@ -866,11 +1103,10 @@ bool HLoopOptimization::VectorizeDef(LoopNode* node, return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite(); } -// TODO: saturation arithmetic. bool HLoopOptimization::VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, - Primitive::Type type, + DataType::Type type, uint64_t restrictions) { // Accept anything for which code has already been generated. if (generate_code) { @@ -889,43 +1125,64 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } else if (instruction->IsArrayGet()) { // Deal with vector restrictions. - if (instruction->AsArrayGet()->IsStringCharAt() && - HasVectorRestrictions(restrictions, kNoStringCharAt)) { + bool is_string_char_at = instruction->AsArrayGet()->IsStringCharAt(); + if (is_string_char_at && HasVectorRestrictions(restrictions, kNoStringCharAt)) { return false; } // Accept a right-hand-side array base[index] for - // (1) exact matching vector type, + // (1) matching vector type (exact match or signed/unsigned integral type of the same size), // (2) loop-invariant base, // (3) unit stride index, // (4) vectorizable right-hand-side value. HInstruction* base = instruction->InputAt(0); HInstruction* index = instruction->InputAt(1); HInstruction* offset = nullptr; - if (type == instruction->GetType() && + if (HVecOperation::ToSignedType(type) == HVecOperation::ToSignedType(instruction->GetType()) && node->loop_info->IsDefinedOutOfTheLoop(base) && induction_range_.IsUnitStride(instruction, index, graph_, &offset)) { if (generate_code) { GenerateVecSub(index, offset); GenerateVecMem(instruction, vector_map_->Get(index), nullptr, offset, type); } else { - vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false)); + vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false, is_string_char_at)); } return true; } + } else if (instruction->IsPhi()) { + // Accept particular phi operations. + if (reductions_->find(instruction) != reductions_->end()) { + // Deal with vector restrictions. + if (HasVectorRestrictions(restrictions, kNoReduction)) { + return false; + } + // Accept a reduction. + if (generate_code) { + GenerateVecReductionPhi(instruction->AsPhi()); + } + return true; + } + // TODO: accept right-hand-side induction? + return false; } else if (instruction->IsTypeConversion()) { // Accept particular type conversions. HTypeConversion* conversion = instruction->AsTypeConversion(); HInstruction* opa = conversion->InputAt(0); - Primitive::Type from = conversion->GetInputType(); - Primitive::Type to = conversion->GetResultType(); - if ((to == Primitive::kPrimByte || - to == Primitive::kPrimChar || - to == Primitive::kPrimShort) && from == Primitive::kPrimInt) { - // Accept a "narrowing" type conversion from a "wider" computation for - // (1) conversion into final required type, - // (2) vectorizable operand, - // (3) "wider" operations cannot bring in higher order bits. - if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) { + DataType::Type from = conversion->GetInputType(); + DataType::Type to = conversion->GetResultType(); + if (DataType::IsIntegralType(from) && DataType::IsIntegralType(to)) { + uint32_t size_vec = DataType::Size(type); + uint32_t size_from = DataType::Size(from); + uint32_t size_to = DataType::Size(to); + // Accept an integral conversion + // (1a) narrowing into vector type, "wider" operations cannot bring in higher order bits, or + // (1b) widening from at least vector type, and + // (2) vectorizable operand. + if ((size_to < size_from && + size_to == size_vec && + VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) || + (size_to >= size_from && + size_from >= size_vec && + VectorizeUse(node, opa, generate_code, type, restrictions))) { if (generate_code) { if (vector_mode_ == kVector) { vector_map_->Put(instruction, vector_map_->Get(opa)); // operand pass-through @@ -935,7 +1192,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } return true; } - } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) { + } else if (to == DataType::Type::kFloat32 && from == DataType::Type::kInt32) { DCHECK_EQ(to, type); // Accept int to float conversion for // (1) supported int, @@ -977,7 +1234,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return true; } } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) { - // Recognize vectorization idioms. + // Recognize halving add idiom. if (VectorizeHalvingAddIdiom(node, instruction, generate_code, type, restrictions)) { return true; } @@ -1010,7 +1267,7 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, if (VectorizeUse(node, r, generate_code, type, restrictions) && IsInt64AndGet(opb, /*out*/ &distance)) { // Restrict shift distance to packed data type width. - int64_t max_distance = Primitive::ComponentSize(type) * 8; + int64_t max_distance = DataType::Size(type) * 8; if (0 <= distance && distance < max_distance) { if (generate_code) { GenerateVecOp(instruction, vector_map_->Get(r), opb, type); @@ -1043,43 +1300,10 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, } if (VectorizeUse(node, r, generate_code, type, restrictions)) { if (generate_code) { - GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type); - } - return true; - } - return false; - } - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - // Deal with vector restrictions. - HInstruction* opa = instruction->InputAt(0); - HInstruction* opb = instruction->InputAt(1); - HInstruction* r = opa; - HInstruction* s = opb; - bool is_unsigned = false; - if (HasVectorRestrictions(restrictions, kNoMinMax)) { - return false; - } else if (HasVectorRestrictions(restrictions, kNoHiBits) && - !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) { - return false; // reject, unless all operands are same-extension narrower - } - // Accept MIN/MAX(x, y) for vectorizable operands. - DCHECK(r != nullptr && s != nullptr); - if (generate_code && vector_mode_ != kVector) { // de-idiom - r = opa; - s = opb; - } - if (VectorizeUse(node, r, generate_code, type, restrictions) && - VectorizeUse(node, s, generate_code, type, restrictions)) { - if (generate_code) { - GenerateVecOp( - instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned); + GenerateVecOp(instruction, + vector_map_->Get(r), + nullptr, + HVecOperation::ToProperType(type, is_unsigned)); } return true; } @@ -1092,134 +1316,152 @@ bool HLoopOptimization::VectorizeUse(LoopNode* node, return false; } -bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) { +uint32_t HLoopOptimization::GetVectorSizeInBytes() { + switch (compiler_driver_->GetInstructionSet()) { + case InstructionSet::kArm: + case InstructionSet::kThumb2: + return 8; // 64-bit SIMD + default: + return 16; // 128-bit SIMD + } +} + +bool HLoopOptimization::TrySetVectorType(DataType::Type type, uint64_t* restrictions) { const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures(); switch (compiler_driver_->GetInstructionSet()) { - case kArm: - case kThumb2: + case InstructionSet::kArm: + case InstructionSet::kThumb2: // Allow vectorization for all ARM devices, because Android assumes that - // ARM 32-bit always supports advanced SIMD. + // ARM 32-bit always supports advanced SIMD (64-bit SIMD). switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - *restrictions |= kNoDiv; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + *restrictions |= kNoDiv | kNoReduction; return TrySetVectorLength(8); - case Primitive::kPrimChar: - case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoStringCharAt; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + *restrictions |= kNoDiv | kNoStringCharAt | kNoReduction; return TrySetVectorLength(4); - case Primitive::kPrimInt: - *restrictions |= kNoDiv; + case DataType::Type::kInt32: + *restrictions |= kNoDiv | kNoWideSAD; return TrySetVectorLength(2); default: break; } return false; - case kArm64: + case InstructionSet::kArm64: // Allow vectorization for all ARM devices, because Android assumes that - // ARMv8 AArch64 always supports advanced SIMD. + // ARMv8 AArch64 always supports advanced SIMD (128-bit SIMD). switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: *restrictions |= kNoDiv; return TrySetVectorLength(16); - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: *restrictions |= kNoDiv; return TrySetVectorLength(8); - case Primitive::kPrimInt: + case DataType::Type::kInt32: *restrictions |= kNoDiv; return TrySetVectorLength(4); - case Primitive::kPrimLong: - *restrictions |= kNoDiv | kNoMul | kNoMinMax; + case DataType::Type::kInt64: + *restrictions |= kNoDiv | kNoMul; return TrySetVectorLength(2); - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: + *restrictions |= kNoReduction; return TrySetVectorLength(4); - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: return false; } - case kX86: - case kX86_64: - // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors). + case InstructionSet::kX86: + case InstructionSet::kX86_64: + // Allow vectorization for SSE4.1-enabled X86 devices only (128-bit SIMD). if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + *restrictions |= + kNoMul | kNoDiv | kNoShift | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; return TrySetVectorLength(16); - case Primitive::kPrimChar: - case Primitive::kPrimShort: - *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd; + case DataType::Type::kUint16: + case DataType::Type::kInt16: + *restrictions |= kNoDiv | kNoAbs | kNoSignedHAdd | kNoUnroundedHAdd | kNoSAD; return TrySetVectorLength(8); - case Primitive::kPrimInt: - *restrictions |= kNoDiv; + case DataType::Type::kInt32: + *restrictions |= kNoDiv | kNoSAD; return TrySetVectorLength(4); - case Primitive::kPrimLong: - *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoMinMax; + case DataType::Type::kInt64: + *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs | kNoSAD; return TrySetVectorLength(2); - case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // -0.0 vs +0.0 + case DataType::Type::kFloat32: + *restrictions |= kNoReduction; return TrySetVectorLength(4); - case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // -0.0 vs +0.0 + case DataType::Type::kFloat64: + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; } // switch type } return false; - case kMips: + case InstructionSet::kMips: if (features->AsMipsInstructionSetFeatures()->HasMsa()) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: *restrictions |= kNoDiv; return TrySetVectorLength(16); - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: *restrictions |= kNoDiv | kNoStringCharAt; return TrySetVectorLength(8); - case Primitive::kPrimInt: + case DataType::Type::kInt32: *restrictions |= kNoDiv; return TrySetVectorLength(4); - case Primitive::kPrimLong: + case DataType::Type::kInt64: *restrictions |= kNoDiv; return TrySetVectorLength(2); - case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // min/max(x, NaN) + case DataType::Type::kFloat32: + *restrictions |= kNoReduction; return TrySetVectorLength(4); - case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // min/max(x, NaN) + case DataType::Type::kFloat64: + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; } // switch type } return false; - case kMips64: + case InstructionSet::kMips64: if (features->AsMips64InstructionSetFeatures()->HasMsa()) { switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: *restrictions |= kNoDiv; return TrySetVectorLength(16); - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint16: + case DataType::Type::kInt16: *restrictions |= kNoDiv | kNoStringCharAt; return TrySetVectorLength(8); - case Primitive::kPrimInt: + case DataType::Type::kInt32: *restrictions |= kNoDiv; return TrySetVectorLength(4); - case Primitive::kPrimLong: + case DataType::Type::kInt64: *restrictions |= kNoDiv; return TrySetVectorLength(2); - case Primitive::kPrimFloat: - *restrictions |= kNoMinMax; // min/max(x, NaN) + case DataType::Type::kFloat32: + *restrictions |= kNoReduction; return TrySetVectorLength(4); - case Primitive::kPrimDouble: - *restrictions |= kNoMinMax; // min/max(x, NaN) + case DataType::Type::kFloat64: + *restrictions |= kNoReduction; return TrySetVectorLength(2); default: break; @@ -1243,7 +1485,7 @@ bool HLoopOptimization::TrySetVectorLength(uint32_t length) { return vector_length_ == length; } -void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) { +void HLoopOptimization::GenerateVecInv(HInstruction* org, DataType::Type type) { if (vector_map_->find(org) == vector_map_->end()) { // In scalar code, just use a self pass-through for scalar invariants // (viz. expression remains itself). @@ -1252,9 +1494,24 @@ void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) return; } // In vector code, explicit scalar expansion is needed. - HInstruction* vector = new (global_allocator_) HVecReplicateScalar( - global_allocator_, org, type, vector_length_); - vector_map_->Put(org, Insert(vector_preheader_, vector)); + HInstruction* vector = nullptr; + auto it = vector_permanent_map_->find(org); + if (it != vector_permanent_map_->end()) { + vector = it->second; // reuse during unrolling + } else { + // Generates ReplicateScalar( (optional_type_conv) org ). + HInstruction* input = org; + DataType::Type input_type = input->GetType(); + if (type != input_type && (type == DataType::Type::kInt64 || + input_type == DataType::Type::kInt64)) { + input = Insert(vector_preheader_, + new (global_allocator_) HTypeConversion(type, input, kNoDexPc)); + } + vector = new (global_allocator_) + HVecReplicateScalar(global_allocator_, input, type, vector_length_, kNoDexPc); + vector_permanent_map_->Put(org, Insert(vector_preheader_, vector)); + } + vector_map_->Put(org, vector); } } @@ -1263,7 +1520,7 @@ void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) HInstruction* subscript = vector_index_; int64_t value = 0; if (!IsInt64AndGet(offset, &value) || value != 0) { - subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset); + subscript = new (global_allocator_) HAdd(DataType::Type::kInt32, subscript, offset); if (org->IsPhi()) { Insert(vector_body_, subscript); // lacks layout placeholder } @@ -1276,41 +1533,147 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, HInstruction* opa, HInstruction* opb, HInstruction* offset, - Primitive::Type type) { + DataType::Type type) { + uint32_t dex_pc = org->GetDexPc(); HInstruction* vector = nullptr; if (vector_mode_ == kVector) { // Vector store or load. + bool is_string_char_at = false; HInstruction* base = org->InputAt(0); if (opb != nullptr) { vector = new (global_allocator_) HVecStore( - global_allocator_, base, opa, opb, type, vector_length_); + global_allocator_, base, opa, opb, type, org->GetSideEffects(), vector_length_, dex_pc); } else { - bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); - vector = new (global_allocator_) HVecLoad( - global_allocator_, base, opa, type, vector_length_, is_string_char_at); + is_string_char_at = org->AsArrayGet()->IsStringCharAt(); + vector = new (global_allocator_) HVecLoad(global_allocator_, + base, + opa, + type, + org->GetSideEffects(), + vector_length_, + is_string_char_at, + dex_pc); } - // Known dynamically enforced alignment? - // TODO: detect offset + constant differences. - // TODO: long run, static alignment analysis? - if (vector_peeling_candidate_ != nullptr && - vector_peeling_candidate_->base == base && - vector_peeling_candidate_->offset == offset) { - vector->AsVecMemoryOperation()->SetAlignment(Alignment(kAlignedBase, 0)); + // Known (forced/adjusted/original) alignment? + if (vector_dynamic_peeling_candidate_ != nullptr) { + if (vector_dynamic_peeling_candidate_->offset == offset && // TODO: diffs too? + DataType::Size(vector_dynamic_peeling_candidate_->type) == DataType::Size(type) && + vector_dynamic_peeling_candidate_->is_string_char_at == is_string_char_at) { + vector->AsVecMemoryOperation()->SetAlignment( // forced + Alignment(GetVectorSizeInBytes(), 0)); + } + } else { + vector->AsVecMemoryOperation()->SetAlignment( // adjusted/original + ComputeAlignment(offset, type, is_string_char_at, vector_static_peeling_factor_)); } } else { // Scalar store or load. DCHECK(vector_mode_ == kSequential); if (opb != nullptr) { - vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc); + DataType::Type component_type = org->AsArraySet()->GetComponentType(); + vector = new (global_allocator_) HArraySet( + org->InputAt(0), opa, opb, component_type, org->GetSideEffects(), dex_pc); } else { bool is_string_char_at = org->AsArrayGet()->IsStringCharAt(); vector = new (global_allocator_) HArrayGet( - org->InputAt(0), opa, type, kNoDexPc, is_string_char_at); + org->InputAt(0), opa, org->GetType(), org->GetSideEffects(), dex_pc, is_string_char_at); } } vector_map_->Put(org, vector); } +void HLoopOptimization::GenerateVecReductionPhi(HPhi* phi) { + DCHECK(reductions_->find(phi) != reductions_->end()); + DCHECK(reductions_->Get(phi->InputAt(1)) == phi); + HInstruction* vector = nullptr; + if (vector_mode_ == kSequential) { + HPhi* new_phi = new (global_allocator_) HPhi( + global_allocator_, kNoRegNumber, 0, phi->GetType()); + vector_header_->AddPhi(new_phi); + vector = new_phi; + } else { + // Link vector reduction back to prior unrolled update, or a first phi. + auto it = vector_permanent_map_->find(phi); + if (it != vector_permanent_map_->end()) { + vector = it->second; + } else { + HPhi* new_phi = new (global_allocator_) HPhi( + global_allocator_, kNoRegNumber, 0, HVecOperation::kSIMDType); + vector_header_->AddPhi(new_phi); + vector = new_phi; + } + } + vector_map_->Put(phi, vector); +} + +void HLoopOptimization::GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction) { + HInstruction* new_phi = vector_map_->Get(phi); + HInstruction* new_init = reductions_->Get(phi); + HInstruction* new_red = vector_map_->Get(reduction); + // Link unrolled vector loop back to new phi. + for (; !new_phi->IsPhi(); new_phi = vector_permanent_map_->Get(new_phi)) { + DCHECK(new_phi->IsVecOperation()); + } + // Prepare the new initialization. + if (vector_mode_ == kVector) { + // Generate a [initial, 0, .., 0] vector for add or + // a [initial, initial, .., initial] vector for min/max. + HVecOperation* red_vector = new_red->AsVecOperation(); + HVecReduce::ReductionKind kind = GetReductionKind(red_vector); + uint32_t vector_length = red_vector->GetVectorLength(); + DataType::Type type = red_vector->GetPackedType(); + if (kind == HVecReduce::ReductionKind::kSum) { + new_init = Insert(vector_preheader_, + new (global_allocator_) HVecSetScalars(global_allocator_, + &new_init, + type, + vector_length, + 1, + kNoDexPc)); + } else { + new_init = Insert(vector_preheader_, + new (global_allocator_) HVecReplicateScalar(global_allocator_, + new_init, + type, + vector_length, + kNoDexPc)); + } + } else { + new_init = ReduceAndExtractIfNeeded(new_init); + } + // Set the phi inputs. + DCHECK(new_phi->IsPhi()); + new_phi->AsPhi()->AddInput(new_init); + new_phi->AsPhi()->AddInput(new_red); + // New feed value for next phi (safe mutation in iteration). + reductions_->find(phi)->second = new_phi; +} + +HInstruction* HLoopOptimization::ReduceAndExtractIfNeeded(HInstruction* instruction) { + if (instruction->IsPhi()) { + HInstruction* input = instruction->InputAt(1); + if (HVecOperation::ReturnsSIMDValue(input)) { + DCHECK(!input->IsPhi()); + HVecOperation* input_vector = input->AsVecOperation(); + uint32_t vector_length = input_vector->GetVectorLength(); + DataType::Type type = input_vector->GetPackedType(); + HVecReduce::ReductionKind kind = GetReductionKind(input_vector); + HBasicBlock* exit = instruction->GetBlock()->GetSuccessors()[0]; + // Generate a vector reduction and scalar extract + // x = REDUCE( [x_1, .., x_n] ) + // y = x_1 + // along the exit of the defining loop. + HInstruction* reduce = new (global_allocator_) HVecReduce( + global_allocator_, instruction, type, vector_length, kind, kNoDexPc); + exit->InsertInstructionBefore(reduce, exit->GetFirstInstruction()); + instruction = new (global_allocator_) HVecExtractScalar( + global_allocator_, reduce, type, vector_length, 0, kNoDexPc); + exit->InsertInstructionAfter(instruction, reduce); + } + } + return instruction; +} + #define GENERATE_VEC(x, y) \ if (vector_mode_ == kVector) { \ vector = (x); \ @@ -1323,79 +1686,71 @@ void HLoopOptimization::GenerateVecMem(HInstruction* org, void HLoopOptimization::GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - Primitive::Type type, - bool is_unsigned) { - if (vector_mode_ == kSequential) { - // Non-converting scalar code follows implicit integral promotion. - if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean || - type == Primitive::kPrimByte || - type == Primitive::kPrimChar || - type == Primitive::kPrimShort)) { - type = Primitive::kPrimInt; - } - } + DataType::Type type) { + uint32_t dex_pc = org->GetDexPc(); HInstruction* vector = nullptr; + DataType::Type org_type = org->GetType(); switch (org->GetKind()) { case HInstruction::kNeg: DCHECK(opb == nullptr); GENERATE_VEC( - new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HNeg(type, opa)); + new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HNeg(org_type, opa, dex_pc)); case HInstruction::kNot: DCHECK(opb == nullptr); GENERATE_VEC( - new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HNot(type, opa)); + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HNot(org_type, opa, dex_pc)); case HInstruction::kBooleanNot: DCHECK(opb == nullptr); GENERATE_VEC( - new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HBooleanNot(opa)); + new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HBooleanNot(opa, dex_pc)); case HInstruction::kTypeConversion: DCHECK(opb == nullptr); GENERATE_VEC( - new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_), - new (global_allocator_) HTypeConversion(type, opa, kNoDexPc)); + new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_, dex_pc), + new (global_allocator_) HTypeConversion(org_type, opa, dex_pc)); case HInstruction::kAdd: GENERATE_VEC( - new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HAdd(type, opa, opb)); + new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HAdd(org_type, opa, opb, dex_pc)); case HInstruction::kSub: GENERATE_VEC( - new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HSub(type, opa, opb)); + new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HSub(org_type, opa, opb, dex_pc)); case HInstruction::kMul: GENERATE_VEC( - new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HMul(type, opa, opb)); + new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HMul(org_type, opa, opb, dex_pc)); case HInstruction::kDiv: GENERATE_VEC( - new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HDiv(type, opa, opb, kNoDexPc)); + new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HDiv(org_type, opa, opb, dex_pc)); case HInstruction::kAnd: GENERATE_VEC( - new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HAnd(type, opa, opb)); + new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HAnd(org_type, opa, opb, dex_pc)); case HInstruction::kOr: GENERATE_VEC( - new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HOr(type, opa, opb)); + new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HOr(org_type, opa, opb, dex_pc)); case HInstruction::kXor: GENERATE_VEC( - new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HXor(type, opa, opb)); + new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HXor(org_type, opa, opb, dex_pc)); case HInstruction::kShl: GENERATE_VEC( - new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HShl(type, opa, opb)); + new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HShl(org_type, opa, opb, dex_pc)); case HInstruction::kShr: GENERATE_VEC( - new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HShr(type, opa, opb)); + new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HShr(org_type, opa, opb, dex_pc)); case HInstruction::kUShr: GENERATE_VEC( - new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_), - new (global_allocator_) HUShr(type, opa, opb)); + new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_, dex_pc), + new (global_allocator_) HUShr(org_type, opa, opb, dex_pc)); case HInstruction::kInvokeStaticOrDirect: { HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect(); if (vector_mode_ == kVector) { @@ -1405,26 +1760,11 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, case Intrinsics::kMathAbsFloat: case Intrinsics::kMathAbsDouble: DCHECK(opb == nullptr); - vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_); - break; - case Intrinsics::kMathMinIntInt: - case Intrinsics::kMathMinLongLong: - case Intrinsics::kMathMinFloatFloat: - case Intrinsics::kMathMinDoubleDouble: { vector = new (global_allocator_) - HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned); + HVecAbs(global_allocator_, opa, type, vector_length_, dex_pc); break; - } - case Intrinsics::kMathMaxIntInt: - case Intrinsics::kMathMaxLongLong: - case Intrinsics::kMathMaxFloatFloat: - case Intrinsics::kMathMaxDoubleDouble: { - vector = new (global_allocator_) - HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned); - break; - } default: - LOG(FATAL) << "Unsupported SIMD intrinsic"; + LOG(FATAL) << "Unsupported SIMD intrinsic " << org->GetId(); UNREACHABLE(); } // switch invoke } else { @@ -1476,8 +1816,8 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, // // Method recognizes the following idioms: -// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b -// regular halving add (a + b) >> 1 for unsigned/signed operands a, b +// rounding halving add (a + b + 1) >> 1 for unsigned/signed operands a, b +// truncated halving add (a + b) >> 1 for unsigned/signed operands a, b // Provided that the operands are promoted to a wider form to do the arithmetic and // then cast back to narrower form, the idioms can be mapped into efficient SIMD // implementation that operates directly in narrower form (plus one extra bit). @@ -1486,15 +1826,14 @@ void HLoopOptimization::GenerateVecOp(HInstruction* org, bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* instruction, bool generate_code, - Primitive::Type type, + DataType::Type type, uint64_t restrictions) { // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1 // (note whether the sign bit in wider precision is shifted in has no effect // on the narrow precision computed by the idiom). - int64_t distance = 0; if ((instruction->IsShr() || instruction->IsUShr()) && - IsInt64AndGet(instruction->InputAt(1), /*out*/ &distance) && distance == 1) { + IsInt64Value(instruction->InputAt(1), 1)) { // Test for (a + b + c) >> 1 for optional constant c. HInstruction* a = nullptr; HInstruction* b = nullptr; @@ -1522,7 +1861,8 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, } // Accept recognized halving add for vectorizable operands. Vectorized code uses the // shorthand idiomatic operation. Sequential code uses the original scalar expressions. - DCHECK(r != nullptr && s != nullptr); + DCHECK(r != nullptr); + DCHECK(s != nullptr); if (generate_code && vector_mode_ != kVector) { // de-idiom r = instruction->InputAt(0); s = instruction->InputAt(1); @@ -1535,10 +1875,11 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, global_allocator_, vector_map_->Get(r), vector_map_->Get(s), - type, + HVecOperation::ToProperType(type, is_unsigned), vector_length_, - is_unsigned, - is_rounded)); + is_rounded, + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); } else { GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type); } @@ -1550,44 +1891,195 @@ bool HLoopOptimization::VectorizeHalvingAddIdiom(LoopNode* node, return false; } +// Method recognizes the following idiom: +// q += ABS(a - b) for signed operands a, b +// Provided that the operands have the same type or are promoted to a wider form. +// Since this may involve a vector length change, the idiom is handled by going directly +// to a sad-accumulate node (rather than relying combining finer grained nodes later). +// TODO: unsigned SAD too? +bool HLoopOptimization::VectorizeSADIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type reduction_type, + uint64_t restrictions) { + // Filter integral "q += ABS(a - b);" reduction, where ABS and SUB + // are done in the same precision (either int or long). + if (!instruction->IsAdd() || + (reduction_type != DataType::Type::kInt32 && reduction_type != DataType::Type::kInt64)) { + return false; + } + HInstruction* q = instruction->InputAt(0); + HInstruction* v = instruction->InputAt(1); + HInstruction* a = nullptr; + HInstruction* b = nullptr; + if (v->IsInvokeStaticOrDirect() && + (v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsInt || + v->AsInvokeStaticOrDirect()->GetIntrinsic() == Intrinsics::kMathAbsLong)) { + HInstruction* x = v->InputAt(0); + if (x->GetType() == reduction_type) { + int64_t c = 0; + if (x->IsSub()) { + a = x->InputAt(0); + b = x->InputAt(1); + } else if (IsAddConst(x, /*out*/ &a, /*out*/ &c)) { + b = graph_->GetConstant(reduction_type, -c); // hidden SUB! + } + } + } + if (a == nullptr || b == nullptr) { + return false; + } + // Accept same-type or consistent sign extension for narrower-type on operands a and b. + // The same-type or narrower operands are called r (a or lower) and s (b or lower). + // We inspect the operands carefully to pick the most suited type. + HInstruction* r = a; + HInstruction* s = b; + bool is_unsigned = false; + DataType::Type sub_type = a->GetType(); + if (DataType::Size(b->GetType()) < DataType::Size(sub_type)) { + sub_type = b->GetType(); + } + if (a->IsTypeConversion() && + DataType::Size(a->InputAt(0)->GetType()) < DataType::Size(sub_type)) { + sub_type = a->InputAt(0)->GetType(); + } + if (b->IsTypeConversion() && + DataType::Size(b->InputAt(0)->GetType()) < DataType::Size(sub_type)) { + sub_type = b->InputAt(0)->GetType(); + } + if (reduction_type != sub_type && + (!IsNarrowerOperands(a, b, sub_type, &r, &s, &is_unsigned) || is_unsigned)) { + return false; + } + // Try same/narrower type and deal with vector restrictions. + if (!TrySetVectorType(sub_type, &restrictions) || + HasVectorRestrictions(restrictions, kNoSAD) || + (reduction_type != sub_type && HasVectorRestrictions(restrictions, kNoWideSAD))) { + return false; + } + // Accept SAD idiom for vectorizable operands. Vectorized code uses the shorthand + // idiomatic operation. Sequential code uses the original scalar expressions. + DCHECK(r != nullptr); + DCHECK(s != nullptr); + if (generate_code && vector_mode_ != kVector) { // de-idiom + r = s = v->InputAt(0); + } + if (VectorizeUse(node, q, generate_code, sub_type, restrictions) && + VectorizeUse(node, r, generate_code, sub_type, restrictions) && + VectorizeUse(node, s, generate_code, sub_type, restrictions)) { + if (generate_code) { + reduction_type = HVecOperation::ToProperType(reduction_type, is_unsigned); + if (vector_mode_ == kVector) { + vector_map_->Put(instruction, new (global_allocator_) HVecSADAccumulate( + global_allocator_, + vector_map_->Get(q), + vector_map_->Get(r), + vector_map_->Get(s), + reduction_type, + GetOtherVL(reduction_type, sub_type, vector_length_), + kNoDexPc)); + MaybeRecordStat(stats_, MethodCompilationStat::kLoopVectorizedIdiom); + } else { + GenerateVecOp(v, vector_map_->Get(r), nullptr, reduction_type); + GenerateVecOp(instruction, vector_map_->Get(q), vector_map_->Get(v), reduction_type); + } + } + return true; + } + return false; +} + // // Vectorization heuristics. // +Alignment HLoopOptimization::ComputeAlignment(HInstruction* offset, + DataType::Type type, + bool is_string_char_at, + uint32_t peeling) { + // Combine the alignment and hidden offset that is guaranteed by + // the Android runtime with a known starting index adjusted as bytes. + int64_t value = 0; + if (IsInt64AndGet(offset, /*out*/ &value)) { + uint32_t start_offset = + HiddenOffset(type, is_string_char_at) + (value + peeling) * DataType::Size(type); + return Alignment(BaseAlignment(), start_offset & (BaseAlignment() - 1u)); + } + // Otherwise, the Android runtime guarantees at least natural alignment. + return Alignment(DataType::Size(type), 0); +} + +void HLoopOptimization::SetAlignmentStrategy(uint32_t peeling_votes[], + const ArrayReference* peeling_candidate) { + // Current heuristic: pick the best static loop peeling factor, if any, + // or otherwise use dynamic loop peeling on suggested peeling candidate. + uint32_t max_vote = 0; + for (int32_t i = 0; i < 16; i++) { + if (peeling_votes[i] > max_vote) { + max_vote = peeling_votes[i]; + vector_static_peeling_factor_ = i; + } + } + if (max_vote == 0) { + vector_dynamic_peeling_candidate_ = peeling_candidate; + } +} + +uint32_t HLoopOptimization::MaxNumberPeeled() { + if (vector_dynamic_peeling_candidate_ != nullptr) { + return vector_length_ - 1u; // worst-case + } + return vector_static_peeling_factor_; // known exactly +} + bool HLoopOptimization::IsVectorizationProfitable(int64_t trip_count) { - // Current heuristic: non-empty body with sufficient number - // of iterations (if known). + // Current heuristic: non-empty body with sufficient number of iterations (if known). // TODO: refine by looking at e.g. operation count, alignment, etc. + // TODO: trip count is really unsigned entity, provided the guarding test + // is satisfied; deal with this more carefully later + uint32_t max_peel = MaxNumberPeeled(); if (vector_length_ == 0) { return false; // nothing found - } else if (0 < trip_count && trip_count < vector_length_) { + } else if (trip_count < 0) { + return false; // guard against non-taken/large + } else if ((0 < trip_count) && (trip_count < (vector_length_ + max_peel))) { return false; // insufficient iterations } return true; } -void HLoopOptimization::SetPeelingCandidate(int64_t trip_count ATTRIBUTE_UNUSED) { - // Current heuristic: none. - // TODO: implement -} +static constexpr uint32_t ARM64_SIMD_MAXIMUM_UNROLL_FACTOR = 8; +static constexpr uint32_t ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE = 50; uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_count) { - // Current heuristic: unroll by 2 on ARM64/X86 for large known trip - // counts and small loop bodies. - // TODO: refine with operation count, remaining iterations, etc. - // Artem had some really cool ideas for this already. + uint32_t max_peel = MaxNumberPeeled(); switch (compiler_driver_->GetInstructionSet()) { - case kArm64: - case kX86: - case kX86_64: { - size_t num_instructions = block->GetInstructions().CountSize(); - if (num_instructions <= 10 && trip_count >= 4 * vector_length_) { - return 2; + case InstructionSet::kArm64: { + // Don't unroll with insufficient iterations. + // TODO: Unroll loops with unknown trip count. + DCHECK_NE(vector_length_, 0u); + if (trip_count < (2 * vector_length_ + max_peel)) { + return kNoUnrollingFactor; + } + // Don't unroll for large loop body size. + uint32_t instruction_count = block->GetInstructions().CountSize(); + if (instruction_count >= ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE) { + return kNoUnrollingFactor; } - return 1; + // Find a beneficial unroll factor with the following restrictions: + // - At least one iteration of the transformed loop should be executed. + // - The loop body shouldn't be "too big" (heuristic). + uint32_t uf1 = ARM64_SIMD_HEURISTIC_MAX_BODY_SIZE / instruction_count; + uint32_t uf2 = (trip_count - max_peel) / vector_length_; + uint32_t unroll_factor = + TruncToPowerOfTwo(std::min({uf1, uf2, ARM64_SIMD_MAXIMUM_UNROLL_FACTOR})); + DCHECK_GE(unroll_factor, 1u); + return unroll_factor; } + case InstructionSet::kX86: + case InstructionSet::kX86_64: default: - return 1; + return kNoUnrollingFactor; } } @@ -1596,13 +2088,17 @@ uint32_t HLoopOptimization::GetUnrollingFactor(HBasicBlock* block, int64_t trip_ // bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { + // Start with empty phi induction. + iset_->clear(); + // Special case Phis that have equivalent in a debuggable setup. Our graph checker isn't // smart enough to follow strongly connected components (and it's probably not worth // it to make it so). See b/33775412. if (graph_->IsDebuggable() && phi->HasEquivalentPhi()) { return false; } - DCHECK(iset_->empty()); + + // Lookup phi induction cycle. ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi); if (set != nullptr) { for (HInstruction* i : *set) { @@ -1614,6 +2110,7 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { } else if (!i->IsRemovable()) { return false; } else if (i != phi && restrict_uses) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : i->GetUses()) { if (set->find(use.GetUser()) == set->end()) { return false; @@ -1627,17 +2124,65 @@ bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) { return false; } -// Find: phi: Phi(init, addsub) -// s: SuspendCheck -// c: Condition(phi, bound) -// i: If(c) -// TODO: Find a less pattern matching approach? -bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { +bool HLoopOptimization::TrySetPhiReduction(HPhi* phi) { DCHECK(iset_->empty()); - HInstruction* phi = block->GetFirstPhi(); - if (phi != nullptr && - phi->GetNext() == nullptr && - TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) { + // Only unclassified phi cycles are candidates for reductions. + if (induction_range_.IsClassified(phi)) { + return false; + } + // Accept operations like x = x + .., provided that the phi and the reduction are + // used exactly once inside the loop, and by each other. + HInputsRef inputs = phi->GetInputs(); + if (inputs.size() == 2) { + HInstruction* reduction = inputs[1]; + if (HasReductionFormat(reduction, phi)) { + HLoopInformation* loop_info = phi->GetBlock()->GetLoopInformation(); + uint32_t use_count = 0; + bool single_use_inside_loop = + // Reduction update only used by phi. + reduction->GetUses().HasExactlyOneElement() && + !reduction->HasEnvironmentUses() && + // Reduction update is only use of phi inside the loop. + IsOnlyUsedAfterLoop(loop_info, phi, /*collect_loop_uses*/ true, &use_count) && + iset_->size() == 1; + iset_->clear(); // leave the way you found it + if (single_use_inside_loop) { + // Link reduction back, and start recording feed value. + reductions_->Put(reduction, phi); + reductions_->Put(phi, phi->InputAt(0)); + return true; + } + } + } + return false; +} + +bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi) { + // Start with empty phi induction and reductions. + iset_->clear(); + reductions_->clear(); + + // Scan the phis to find the following (the induction structure has already + // been optimized, so we don't need to worry about trivial cases): + // (1) optional reductions in loop, + // (2) the main induction, used in loop control. + HPhi* phi = nullptr; + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + if (TrySetPhiReduction(it.Current()->AsPhi())) { + continue; + } else if (phi == nullptr) { + // Found the first candidate for main induction. + phi = it.Current()->AsPhi(); + } else { + return false; + } + } + + // Then test for a typical loopheader: + // s: SuspendCheck + // c: Condition(phi, bound) + // i: If(c) + if (phi != nullptr && TrySetPhiInduction(phi, /*restrict_uses*/ false)) { HInstruction* s = block->GetFirstInstruction(); if (s != nullptr && s->IsSuspendCheck()) { HInstruction* c = s->GetNext(); @@ -1649,6 +2194,7 @@ bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) { if (i != nullptr && i->IsIf() && i->InputAt(0) == c) { iset_->insert(c); iset_->insert(s); + *main_phi = phi; return true; } } @@ -1672,6 +2218,7 @@ bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) { bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, HInstruction* instruction) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) { return true; @@ -1683,7 +2230,8 @@ bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info, bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, bool collect_loop_uses, - /*out*/ int32_t* use_count) { + /*out*/ uint32_t* use_count) { + // Deal with regular uses. for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); if (iset_->find(user) == iset_->end()) { // not excluded? @@ -1709,6 +2257,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, // Try to replace outside uses with the last value. if (induction_range_.CanGenerateLastValue(instruction)) { HInstruction* replacement = induction_range_.GenerateLastValue(instruction, graph_, block); + // Deal with regular uses. const HUseList<HInstruction*>& uses = instruction->GetUses(); for (auto it = uses.begin(), end = uses.end(); it != end;) { HInstruction* user = it->GetUser(); @@ -1724,6 +2273,7 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, induction_range_.Replace(user, instruction, replacement); // update induction } } + // Deal with environment uses. const HUseList<HEnvironment*>& env_uses = instruction->GetEnvUses(); for (auto it = env_uses.begin(), end = env_uses.end(); it != end;) { HEnvironment* user = it->GetUser(); @@ -1739,7 +2289,6 @@ bool HLoopOptimization::TryReplaceWithLastValue(HLoopInformation* loop_info, } } } - induction_simplication_count_++; return true; } return false; @@ -1752,7 +2301,7 @@ bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info, // Assigning the last value is always successful if there are no uses. // Otherwise, it succeeds in a no early-exit loop by generating the // proper last value assignment. - int32_t use_count = 0; + uint32_t use_count = 0; return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) && (use_count == 0 || (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(loop_info, instruction, block))); diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h index de4bd85fc8..d70751037b 100644 --- a/compiler/optimizing/loop_optimization.h +++ b/compiler/optimizing/loop_optimization.h @@ -17,6 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ #define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_ +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "induction_var_range.h" #include "nodes.h" #include "optimization.h" @@ -34,7 +36,9 @@ class HLoopOptimization : public HOptimization { public: HLoopOptimization(HGraph* graph, CompilerDriver* compiler_driver, - HInductionVarAnalysis* induction_analysis); + HInductionVarAnalysis* induction_analysis, + OptimizingCompilerStats* stats, + const char* name = kLoopOptimizationPassName); void Run() OVERRIDE; @@ -62,17 +66,19 @@ class HLoopOptimization : public HOptimization { * Vectorization restrictions (bit mask). */ enum VectorRestrictions { - kNone = 0, // no restrictions - kNoMul = 1, // no multiplication - kNoDiv = 2, // no division - kNoShift = 4, // no shift - kNoShr = 8, // no arithmetic shift right - kNoHiBits = 16, // "wider" operations cannot bring in higher order bits - kNoSignedHAdd = 32, // no signed halving add - kNoUnroundedHAdd = 64, // no unrounded halving add - kNoAbs = 128, // no absolute value - kNoMinMax = 256, // no min/max - kNoStringCharAt = 512, // no StringCharAt + kNone = 0, // no restrictions + kNoMul = 1 << 0, // no multiplication + kNoDiv = 1 << 1, // no division + kNoShift = 1 << 2, // no shift + kNoShr = 1 << 3, // no arithmetic shift right + kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits + kNoSignedHAdd = 1 << 5, // no signed halving add + kNoUnroundedHAdd = 1 << 6, // no unrounded halving add + kNoAbs = 1 << 7, // no absolute value + kNoStringCharAt = 1 << 8, // no StringCharAt + kNoReduction = 1 << 9, // no reduction + kNoSAD = 1 << 10, // no sum of absolute differences (SAD) + kNoWideSAD = 1 << 11, // no sum of absolute differences (SAD) with operand widening }; /* @@ -88,34 +94,53 @@ class HLoopOptimization : public HOptimization { * Representation of a unit-stride array reference. */ struct ArrayReference { - ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l) - : base(b), offset(o), type(t), lhs(l) { } + ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l, bool c = false) + : base(b), offset(o), type(t), lhs(l), is_string_char_at(c) { } bool operator<(const ArrayReference& other) const { return (base < other.base) || (base == other.base && (offset < other.offset || (offset == other.offset && (type < other.type || - (type == other.type && lhs < other.lhs))))); + (type == other.type && + (lhs < other.lhs || + (lhs == other.lhs && + is_string_char_at < other.is_string_char_at))))))); } - HInstruction* base; // base address - HInstruction* offset; // offset + i - Primitive::Type type; // component type - bool lhs; // def/use + HInstruction* base; // base address + HInstruction* offset; // offset + i + DataType::Type type; // component type + bool lhs; // def/use + bool is_string_char_at; // compressed string read }; + // // Loop setup and traversal. + // + void LocalRun(); void AddLoop(HLoopInformation* loop_info); void RemoveLoop(LoopNode* node); - void TraverseLoopsInnerToOuter(LoopNode* node); + // Traverses all loops inner to outer to perform simplifications and optimizations. + // Returns true if loops nested inside current loop (node) have changed. + bool TraverseLoopsInnerToOuter(LoopNode* node); + + // // Optimization. + // + void SimplifyInduction(LoopNode* node); void SimplifyBlocks(LoopNode* node); - void OptimizeInnerLoop(LoopNode* node); + // Performs optimizations specific to inner loop (empty loop removal, + // unrolling, vectorization). Returns true if anything changed. + bool OptimizeInnerLoop(LoopNode* node); + + // // Vectorization analysis and synthesis. + // + bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count); void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count); void GenerateNewLoop(LoopNode* node, @@ -129,43 +154,65 @@ class HLoopOptimization : public HOptimization { bool VectorizeUse(LoopNode* node, HInstruction* instruction, bool generate_code, - Primitive::Type type, + DataType::Type type, uint64_t restrictions); - bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions); + uint32_t GetVectorSizeInBytes(); + bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions); bool TrySetVectorLength(uint32_t length); - void GenerateVecInv(HInstruction* org, Primitive::Type type); + void GenerateVecInv(HInstruction* org, DataType::Type type); void GenerateVecSub(HInstruction* org, HInstruction* offset); void GenerateVecMem(HInstruction* org, HInstruction* opa, HInstruction* opb, HInstruction* offset, - Primitive::Type type); + DataType::Type type); + void GenerateVecReductionPhi(HPhi* phi); + void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction); + HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction); void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, - Primitive::Type type, - bool is_unsigned = false); + DataType::Type type); // Vectorization idioms. bool VectorizeHalvingAddIdiom(LoopNode* node, HInstruction* instruction, bool generate_code, - Primitive::Type type, + DataType::Type type, uint64_t restrictions); + bool VectorizeSADIdiom(LoopNode* node, + HInstruction* instruction, + bool generate_code, + DataType::Type type, + uint64_t restrictions); // Vectorization heuristics. + Alignment ComputeAlignment(HInstruction* offset, + DataType::Type type, + bool is_string_char_at, + uint32_t peeling = 0); + void SetAlignmentStrategy(uint32_t peeling_votes[], + const ArrayReference* peeling_candidate); + uint32_t MaxNumberPeeled(); bool IsVectorizationProfitable(int64_t trip_count); - void SetPeelingCandidate(int64_t trip_count); uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count); + // // Helpers. + // + bool TrySetPhiInduction(HPhi* phi, bool restrict_uses); - bool TrySetSimpleLoopHeader(HBasicBlock* block); + bool TrySetPhiReduction(HPhi* phi); + + // Detects loop header with a single induction (returned in main_phi), possibly + // other phis for reductions, but no other side effects. Returns true on success. + bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi); + bool IsEmptyBody(HBasicBlock* block); bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info, HInstruction* instruction, bool collect_loop_uses, - /*out*/ int32_t* use_count); + /*out*/ uint32_t* use_count); bool IsUsedOutsideLoop(HLoopInformation* loop_info, HInstruction* instruction); bool TryReplaceWithLastValue(HLoopInformation* loop_info, @@ -186,7 +233,7 @@ class HLoopOptimization : public HOptimization { // Phase-local heap memory allocator for the loop optimizer. Storage obtained // through this allocator is immediately released when the loop optimizer is done. - ArenaAllocator* loop_allocator_; + ScopedArenaAllocator* loop_allocator_; // Global heap memory allocator. Used to build HIR. ArenaAllocator* global_allocator_; @@ -198,12 +245,14 @@ class HLoopOptimization : public HOptimization { // Temporary bookkeeping of a set of instructions. // Contents reside in phase-local heap memory. - ArenaSet<HInstruction*>* iset_; + ScopedArenaSet<HInstruction*>* iset_; - // Counter that tracks how many induction cycles have been simplified. Useful - // to trigger incremental updates of induction variable analysis of outer loops - // when the induction of inner loops has changed. - uint32_t induction_simplication_count_; + // Temporary bookkeeping of reduction instructions. Mapping is two-fold: + // (1) reductions in the loop-body are mapped back to their phi definition, + // (2) phi definitions are mapped to their initial value (updated during + // code generation to feed the proper values into the new chain). + // Contents reside in phase-local heap memory. + ScopedArenaSafeMap<HInstruction*, HInstruction*>* reductions_; // Flag that tracks if any simplifications have occurred. bool simplified_; @@ -213,10 +262,11 @@ class HLoopOptimization : public HOptimization { // Set of array references in the vector loop. // Contents reside in phase-local heap memory. - ArenaSet<ArrayReference>* vector_refs_; + ScopedArenaSet<ArrayReference>* vector_refs_; - // Dynamic loop peeling candidate for alignment. - const ArrayReference* vector_peeling_candidate_; + // Static or dynamic loop peeling for alignment. + uint32_t vector_static_peeling_factor_; + const ArrayReference* vector_dynamic_peeling_candidate_; // Dynamic data dependence test of the form a != b. HInstruction* vector_runtime_test_a_; @@ -226,7 +276,11 @@ class HLoopOptimization : public HOptimization { // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data // structure maps original instructions into the new instructions. // Contents reside in phase-local heap memory. - ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; + ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_map_; + + // Permanent mapping used during vectorization synthesis. + // Contents reside in phase-local heap memory. + ScopedArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_; // Temporary vectorization bookkeeping. VectorMode vector_mode_; // synthesis mode diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc index 5b9350689e..db8368986c 100644 --- a/compiler/optimizing/loop_optimization_test.cc +++ b/compiler/optimizing/loop_optimization_test.cc @@ -24,14 +24,12 @@ namespace art { * constructing the loop hierarchy. Actual optimizations are tested * through the checker tests. */ -class LoopOptimizationTest : public CommonCompilerTest { +class LoopOptimizationTest : public OptimizingUnitTest { public: LoopOptimizationTest() - : pool_(), - allocator_(&pool_), - graph_(CreateGraph(&allocator_)), - iva_(new (&allocator_) HInductionVarAnalysis(graph_)), - loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) { + : graph_(CreateGraph()), + iva_(new (GetAllocator()) HInductionVarAnalysis(graph_)), + loop_opt_(new (GetAllocator()) HLoopOptimization(graph_, nullptr, iva_, nullptr)) { BuildGraph(); } @@ -40,38 +38,38 @@ class LoopOptimizationTest : public CommonCompilerTest { /** Constructs bare minimum graph. */ void BuildGraph() { graph_->SetNumberOfVRegs(1); - entry_block_ = new (&allocator_) HBasicBlock(graph_); - return_block_ = new (&allocator_) HBasicBlock(graph_); - exit_block_ = new (&allocator_) HBasicBlock(graph_); + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + return_block_ = new (GetAllocator()) HBasicBlock(graph_); + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_block_); graph_->AddBlock(return_block_); graph_->AddBlock(exit_block_); graph_->SetEntryBlock(entry_block_); graph_->SetExitBlock(exit_block_); - parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimInt); + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); entry_block_->AddInstruction(parameter_); - return_block_->AddInstruction(new (&allocator_) HReturnVoid()); - exit_block_->AddInstruction(new (&allocator_) HExit()); + return_block_->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); entry_block_->AddSuccessor(return_block_); return_block_->AddSuccessor(exit_block_); } /** Adds a loop nest at given position before successor. */ HBasicBlock* AddLoop(HBasicBlock* position, HBasicBlock* successor) { - HBasicBlock* header = new (&allocator_) HBasicBlock(graph_); - HBasicBlock* body = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(header); graph_->AddBlock(body); // Control flow. position->ReplaceSuccessor(successor, header); header->AddSuccessor(body); header->AddSuccessor(successor); - header->AddInstruction(new (&allocator_) HIf(parameter_)); + header->AddInstruction(new (GetAllocator()) HIf(parameter_)); body->AddSuccessor(header); - body->AddInstruction(new (&allocator_) HGoto()); + body->AddInstruction(new (GetAllocator()) HGoto()); return header; } @@ -80,7 +78,8 @@ class LoopOptimizationTest : public CommonCompilerTest { graph_->BuildDominatorTree(); iva_->Run(); // Do not release the loop hierarchy. - loop_opt_->loop_allocator_ = &allocator_; + ScopedArenaAllocator loop_allocator(GetArenaStack()); + loop_opt_->loop_allocator_ = &loop_allocator; loop_opt_->LocalRun(); } @@ -101,8 +100,6 @@ class LoopOptimizationTest : public CommonCompilerTest { } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; HInductionVarAnalysis* iva_; HLoopOptimization* loop_opt_; @@ -195,4 +192,125 @@ TEST_F(LoopOptimizationTest, LoopNestWithSequence) { EXPECT_EQ("[[[[[[[[[[][][][][][][][][][]]]]]]]]]]", LoopStructure()); } +// Check that SimplifyLoop() doesn't invalidate data flow when ordering loop headers' +// predecessors. +// +// This is a test for nodes.cc functionality - HGraph::SimplifyLoop. +TEST_F(LoopOptimizationTest, SimplifyLoopReoderPredecessors) { + // Can't use AddLoop as we want special order for blocks predecessors. + HBasicBlock* header = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* body = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(header); + graph_->AddBlock(body); + + // Control flow: make a loop back edge first in the list of predecessors. + entry_block_->RemoveSuccessor(return_block_); + body->AddSuccessor(header); + entry_block_->AddSuccessor(header); + header->AddSuccessor(body); + header->AddSuccessor(return_block_); + DCHECK(header->GetSuccessors()[1] == return_block_); + + // Data flow. + header->AddInstruction(new (GetAllocator()) HIf(parameter_)); + body->AddInstruction(new (GetAllocator()) HGoto()); + + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, parameter_); + header->AddPhi(phi); + body->AddInstruction(add); + + phi->AddInput(add); + phi->AddInput(parameter_); + + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + + // Check that after optimizations in BuildDominatorTree()/SimplifyCFG() phi inputs + // are still mapped correctly to the block predecessors. + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + ASSERT_TRUE(input->GetBlock()->Dominates(header->GetPredecessors()[i])); + } +} + +// Test that SimplifyLoop() processes the multiple-preheaders loops correctly. +// +// This is a test for nodes.cc functionality - HGraph::SimplifyLoop. +TEST_F(LoopOptimizationTest, SimplifyLoopSinglePreheader) { + HBasicBlock* header = AddLoop(entry_block_, return_block_); + + header->InsertInstructionBefore( + new (GetAllocator()) HSuspendCheck(), header->GetLastInstruction()); + + // Insert an if construct before the loop so it will have two preheaders. + HBasicBlock* if_block = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* preheader0 = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* preheader1 = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(if_block); + graph_->AddBlock(preheader0); + graph_->AddBlock(preheader1); + + // Fix successors/predecessors. + entry_block_->ReplaceSuccessor(header, if_block); + if_block->AddSuccessor(preheader0); + if_block->AddSuccessor(preheader1); + preheader0->AddSuccessor(header); + preheader1->AddSuccessor(header); + + if_block->AddInstruction(new (GetAllocator()) HIf(parameter_)); + preheader0->AddInstruction(new (GetAllocator()) HGoto()); + preheader1->AddInstruction(new (GetAllocator()) HGoto()); + + HBasicBlock* body = header->GetSuccessors()[0]; + DCHECK(body != return_block_); + + // Add some data flow. + HIntConstant* const_0 = graph_->GetIntConstant(0); + HIntConstant* const_1 = graph_->GetIntConstant(1); + HIntConstant* const_2 = graph_->GetIntConstant(2); + + HAdd* preheader0_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, const_0); + preheader0->AddInstruction(preheader0_add); + HAdd* preheader1_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, const_1); + preheader1->AddInstruction(preheader1_add); + + HPhi* header_phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + header->AddPhi(header_phi); + + HAdd* body_add = new (GetAllocator()) HAdd(DataType::Type::kInt32, parameter_, const_2); + body->AddInstruction(body_add); + + DCHECK(header->GetPredecessors()[0] == body); + DCHECK(header->GetPredecessors()[1] == preheader0); + DCHECK(header->GetPredecessors()[2] == preheader1); + + header_phi->AddInput(body_add); + header_phi->AddInput(preheader0_add); + header_phi->AddInput(preheader1_add); + + graph_->ClearLoopInformation(); + graph_->ClearDominanceInformation(); + graph_->BuildDominatorTree(); + + EXPECT_EQ(header->GetPredecessors().size(), 2u); + EXPECT_EQ(header->GetPredecessors()[1], body); + + HBasicBlock* new_preheader = header->GetLoopInformation()->GetPreHeader(); + EXPECT_EQ(preheader0->GetSingleSuccessor(), new_preheader); + EXPECT_EQ(preheader1->GetSingleSuccessor(), new_preheader); + + EXPECT_EQ(new_preheader->GetPhis().CountSize(), 1u); + HPhi* new_preheader_phi = new_preheader->GetFirstPhi()->AsPhi(); + EXPECT_EQ(new_preheader_phi->InputCount(), 2u); + EXPECT_EQ(new_preheader_phi->InputAt(0), preheader0_add); + EXPECT_EQ(new_preheader_phi->InputAt(1), preheader1_add); + + EXPECT_EQ(header_phi->InputCount(), 2u); + EXPECT_EQ(header_phi->InputAt(0), new_preheader_phi); + EXPECT_EQ(header_phi->InputAt(1), body_add); +} + } // namespace art diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 4ca833707b..f6ba19f22a 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -18,16 +18,16 @@ #include <cfloat> #include "art_method-inl.h" +#include "base/bit_utils.h" +#include "base/bit_vector-inl.h" +#include "base/stl_util.h" #include "class_linker-inl.h" #include "code_generator.h" #include "common_dominator.h" -#include "ssa_builder.h" -#include "base/bit_vector-inl.h" -#include "base/bit_utils.h" -#include "base/stl_util.h" #include "intrinsics.h" #include "mirror/class-inl.h" #include "scoped_thread_state_change-inl.h" +#include "ssa_builder.h" namespace art { @@ -55,14 +55,18 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. DCHECK_EQ(visited->GetHighestBitSet(), -1); + // Allocate memory from local ScopedArenaAllocator. + ScopedArenaAllocator allocator(GetArenaStack()); // Nodes that we're currently visiting, indexed by block id. - ArenaBitVector visiting(arena_, blocks_.size(), false, kArenaAllocGraphBuilder); + ArenaBitVector visiting( + &allocator, blocks_.size(), /* expandable */ false, kArenaAllocGraphBuilder); + visiting.ClearAllBits(); // Number of successors visited from a given node, indexed by block id. - ArenaVector<size_t> successors_visited(blocks_.size(), - 0u, - arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<size_t> successors_visited(blocks_.size(), + 0u, + allocator.Adapter(kArenaAllocGraphBuilder)); // Stack of nodes that we're currently visiting (same as marked in "visiting" above). - ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HBasicBlock*> worklist(allocator.Adapter(kArenaAllocGraphBuilder)); constexpr size_t kDefaultWorklistSize = 8; worklist.reserve(kDefaultWorklistSize); visited->SetBit(entry_block_->GetBlockId()); @@ -90,7 +94,8 @@ void HGraph::FindBackEdges(ArenaBitVector* visited) { } } -static void RemoveEnvironmentUses(HInstruction* instruction) { +// Remove the environment use records of the instruction for users. +void RemoveEnvironmentUses(HInstruction* instruction) { for (HEnvironment* environment = instruction->GetEnvironment(); environment != nullptr; environment = environment->GetParent()) { @@ -102,6 +107,35 @@ static void RemoveEnvironmentUses(HInstruction* instruction) { } } +// Return whether the instruction has an environment and it's used by others. +bool HasEnvironmentUsedByOthers(HInstruction* instruction) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + HInstruction* user = environment->GetInstructionAt(i); + if (user != nullptr) { + return true; + } + } + } + return false; +} + +// Reset environment records of the instruction itself. +void ResetEnvironmentInputRecords(HInstruction* instruction) { + for (HEnvironment* environment = instruction->GetEnvironment(); + environment != nullptr; + environment = environment->GetParent()) { + for (size_t i = 0, e = environment->Size(); i < e; ++i) { + DCHECK(environment->GetHolder() == instruction); + if (environment->GetInstructionAt(i) != nullptr) { + environment->SetRawEnvAt(i, nullptr); + } + } + } +} + static void RemoveAsUser(HInstruction* instruction) { instruction->RemoveAsUserOfAllInputs(); RemoveEnvironmentUses(instruction); @@ -143,7 +177,11 @@ void HGraph::RemoveDeadBlocks(const ArenaBitVector& visited) { } GraphAnalysisResult HGraph::BuildDominatorTree() { - ArenaBitVector visited(arena_, blocks_.size(), false, kArenaAllocGraphBuilder); + // Allocate memory from local ScopedArenaAllocator. + ScopedArenaAllocator allocator(GetArenaStack()); + + ArenaBitVector visited(&allocator, blocks_.size(), false, kArenaAllocGraphBuilder); + visited.ClearAllBits(); // (1) Find the back edges in the graph doing a DFS traversal. FindBackEdges(&visited); @@ -228,14 +266,16 @@ void HGraph::ComputeDominanceInformation() { reverse_post_order_.reserve(blocks_.size()); reverse_post_order_.push_back(entry_block_); + // Allocate memory from local ScopedArenaAllocator. + ScopedArenaAllocator allocator(GetArenaStack()); // Number of visits of a given node, indexed by block id. - ArenaVector<size_t> visits(blocks_.size(), 0u, arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<size_t> visits(blocks_.size(), 0u, allocator.Adapter(kArenaAllocGraphBuilder)); // Number of successors visited from a given node, indexed by block id. - ArenaVector<size_t> successors_visited(blocks_.size(), - 0u, - arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<size_t> successors_visited(blocks_.size(), + 0u, + allocator.Adapter(kArenaAllocGraphBuilder)); // Nodes for which we need to visit successors. - ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HBasicBlock*> worklist(allocator.Adapter(kArenaAllocGraphBuilder)); constexpr size_t kDefaultWorklistSize = 8; worklist.reserve(kDefaultWorklistSize); worklist.push_back(entry_block_); @@ -305,7 +345,7 @@ void HGraph::ComputeDominanceInformation() { } HBasicBlock* HGraph::SplitEdge(HBasicBlock* block, HBasicBlock* successor) { - HBasicBlock* new_block = new (arena_) HBasicBlock(this, successor->GetDexPc()); + HBasicBlock* new_block = new (allocator_) HBasicBlock(this, successor->GetDexPc()); AddBlock(new_block); // Use `InsertBetween` to ensure the predecessor index and successor index of // `block` and `successor` are preserved. @@ -317,7 +357,7 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { // Insert a new node between `block` and `successor` to split the // critical edge. HBasicBlock* new_block = SplitEdge(block, successor); - new_block->AddInstruction(new (arena_) HGoto(successor->GetDexPc())); + new_block->AddInstruction(new (allocator_) HGoto(successor->GetDexPc())); if (successor->IsLoopHeader()) { // If we split at a back edge boundary, make the new block the back edge. HLoopInformation* info = successor->GetLoopInformation(); @@ -328,30 +368,21 @@ void HGraph::SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor) { } } -void HGraph::SimplifyLoop(HBasicBlock* header) { - HLoopInformation* info = header->GetLoopInformation(); - - // Make sure the loop has only one pre header. This simplifies SSA building by having - // to just look at the pre header to know which locals are initialized at entry of the - // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining - // this graph. - size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges(); - if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) { - HBasicBlock* pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); - AddBlock(pre_header); - pre_header->AddInstruction(new (arena_) HGoto(header->GetDexPc())); - - for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) { - HBasicBlock* predecessor = header->GetPredecessors()[pred]; - if (!info->IsBackEdge(*predecessor)) { - predecessor->ReplaceSuccessor(header, pre_header); - pred--; - } - } - pre_header->AddSuccessor(header); +// Reorder phi inputs to match reordering of the block's predecessors. +static void FixPhisAfterPredecessorsReodering(HBasicBlock* block, size_t first, size_t second) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* phi = it.Current()->AsPhi(); + HInstruction* first_instr = phi->InputAt(first); + HInstruction* second_instr = phi->InputAt(second); + phi->ReplaceInput(first_instr, second); + phi->ReplaceInput(second_instr, first); } +} - // Make sure the first predecessor of a loop header is the incoming block. +// Make sure that the first predecessor of a loop header is the incoming block. +void HGraph::OrderLoopHeaderPredecessors(HBasicBlock* header) { + DCHECK(header->IsLoopHeader()); + HLoopInformation* info = header->GetLoopInformation(); if (info->IsBackEdge(*header->GetPredecessors()[0])) { HBasicBlock* to_swap = header->GetPredecessors()[0]; for (size_t pred = 1, e = header->GetPredecessors().size(); pred < e; ++pred) { @@ -359,10 +390,137 @@ void HGraph::SimplifyLoop(HBasicBlock* header) { if (!info->IsBackEdge(*predecessor)) { header->predecessors_[pred] = to_swap; header->predecessors_[0] = predecessor; + FixPhisAfterPredecessorsReodering(header, 0, pred); break; } } } +} + +// Transform control flow of the loop to a single preheader format (don't touch the data flow). +// New_preheader can be already among the header predecessors - this situation will be correctly +// processed. +static void FixControlForNewSinglePreheader(HBasicBlock* header, HBasicBlock* new_preheader) { + HLoopInformation* loop_info = header->GetLoopInformation(); + for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) { + HBasicBlock* predecessor = header->GetPredecessors()[pred]; + if (!loop_info->IsBackEdge(*predecessor) && predecessor != new_preheader) { + predecessor->ReplaceSuccessor(header, new_preheader); + pred--; + } + } +} + +// == Before == == After == +// _________ _________ _________ _________ +// | B0 | | B1 | (old preheaders) | B0 | | B1 | +// |=========| |=========| |=========| |=========| +// | i0 = .. | | i1 = .. | | i0 = .. | | i1 = .. | +// |_________| |_________| |_________| |_________| +// \ / \ / +// \ / ___v____________v___ +// \ / (new preheader) | B20 <- B0, B1 | +// | | |====================| +// | | | i20 = phi(i0, i1) | +// | | |____________________| +// | | | +// /\ | | /\ /\ | /\ +// / v_______v_________v_______v \ / v___________v_____________v \ +// | | B10 <- B0, B1, B2, B3 | | | | B10 <- B20, B2, B3 | | +// | |===========================| | (header) | |===========================| | +// | | i10 = phi(i0, i1, i2, i3) | | | | i10 = phi(i20, i2, i3) | | +// | |___________________________| | | |___________________________| | +// | / \ | | / \ | +// | ... ... | | ... ... | +// | _________ _________ | | _________ _________ | +// | | B2 | | B3 | | | | B2 | | B3 | | +// | |=========| |=========| | (back edges) | |=========| |=========| | +// | | i2 = .. | | i3 = .. | | | | i2 = .. | | i3 = .. | | +// | |_________| |_________| | | |_________| |_________| | +// \ / \ / \ / \ / +// \___/ \___/ \___/ \___/ +// +void HGraph::TransformLoopToSinglePreheaderFormat(HBasicBlock* header) { + HLoopInformation* loop_info = header->GetLoopInformation(); + + HBasicBlock* preheader = new (allocator_) HBasicBlock(this, header->GetDexPc()); + AddBlock(preheader); + preheader->AddInstruction(new (allocator_) HGoto(header->GetDexPc())); + + // If the old header has no Phis then we only need to fix the control flow. + if (header->GetPhis().IsEmpty()) { + FixControlForNewSinglePreheader(header, preheader); + preheader->AddSuccessor(header); + return; + } + + // Find the first non-back edge block in the header's predecessors list. + size_t first_nonbackedge_pred_pos = 0; + bool found = false; + for (size_t pred = 0; pred < header->GetPredecessors().size(); ++pred) { + HBasicBlock* predecessor = header->GetPredecessors()[pred]; + if (!loop_info->IsBackEdge(*predecessor)) { + first_nonbackedge_pred_pos = pred; + found = true; + break; + } + } + + DCHECK(found); + + // Fix the data-flow. + for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) { + HPhi* header_phi = it.Current()->AsPhi(); + + HPhi* preheader_phi = new (GetAllocator()) HPhi(GetAllocator(), + header_phi->GetRegNumber(), + 0, + header_phi->GetType()); + if (header_phi->GetType() == DataType::Type::kReference) { + preheader_phi->SetReferenceTypeInfo(header_phi->GetReferenceTypeInfo()); + } + preheader->AddPhi(preheader_phi); + + HInstruction* orig_input = header_phi->InputAt(first_nonbackedge_pred_pos); + header_phi->ReplaceInput(preheader_phi, first_nonbackedge_pred_pos); + preheader_phi->AddInput(orig_input); + + for (size_t input_pos = first_nonbackedge_pred_pos + 1; + input_pos < header_phi->InputCount(); + input_pos++) { + HInstruction* input = header_phi->InputAt(input_pos); + HBasicBlock* pred_block = header->GetPredecessors()[input_pos]; + + if (loop_info->Contains(*pred_block)) { + DCHECK(loop_info->IsBackEdge(*pred_block)); + } else { + preheader_phi->AddInput(input); + header_phi->RemoveInputAt(input_pos); + input_pos--; + } + } + } + + // Fix the control-flow. + HBasicBlock* first_pred = header->GetPredecessors()[first_nonbackedge_pred_pos]; + preheader->InsertBetween(first_pred, header); + + FixControlForNewSinglePreheader(header, preheader); +} + +void HGraph::SimplifyLoop(HBasicBlock* header) { + HLoopInformation* info = header->GetLoopInformation(); + + // Make sure the loop has only one pre header. This simplifies SSA building by having + // to just look at the pre header to know which locals are initialized at entry of the + // loop. Also, don't allow the entry block to be a pre header: this simplifies inlining + // this graph. + size_t number_of_incomings = header->GetPredecessors().size() - info->NumberOfBackEdges(); + if (number_of_incomings != 1 || (GetEntryBlock()->GetSingleSuccessor() == header)) { + TransformLoopToSinglePreheaderFormat(header); + } + + OrderLoopHeaderPredecessors(header); HInstruction* first_instruction = header->GetFirstInstruction(); if (first_instruction != nullptr && first_instruction->IsSuspendCheck()) { @@ -392,7 +550,7 @@ void HGraph::ComputeTryBlockInformation() { try_entry != &block->GetTryCatchInformation()->GetTryEntry())) { // We are either setting try block membership for the first time or it // has changed. - block->SetTryCatchInformation(new (arena_) TryCatchInformation(*try_entry)); + block->SetTryCatchInformation(new (allocator_) TryCatchInformation(*try_entry)); } } } @@ -449,6 +607,7 @@ GraphAnalysisResult HGraph::AnalyzeLoops() const { if (block->IsCatchBlock()) { // TODO: Dealing with exceptional back edges could be tricky because // they only approximate the real control flow. Bail out for now. + VLOG(compiler) << "Not compiled: Exceptional back edges"; return kAnalysisFailThrowCatchLoop; } block->GetLoopInformation()->Populate(); @@ -499,7 +658,7 @@ HNullConstant* HGraph::GetNullConstant(uint32_t dex_pc) { // not null and not in a block. Otherwise, we need to clear the instruction // id and/or any invariants the graph is assuming when adding new instructions. if ((cached_null_constant_ == nullptr) || (cached_null_constant_->GetBlock() == nullptr)) { - cached_null_constant_ = new (arena_) HNullConstant(dex_pc); + cached_null_constant_ = new (allocator_) HNullConstant(dex_pc); cached_null_constant_->SetReferenceTypeInfo(inexact_object_rti_); InsertConstant(cached_null_constant_); } @@ -515,8 +674,8 @@ HCurrentMethod* HGraph::GetCurrentMethod() { // not null and not in a block. Otherwise, we need to clear the instruction // id and/or any invariants the graph is assuming when adding new instructions. if ((cached_current_method_ == nullptr) || (cached_current_method_->GetBlock() == nullptr)) { - cached_current_method_ = new (arena_) HCurrentMethod( - Is64BitInstructionSet(instruction_set_) ? Primitive::kPrimLong : Primitive::kPrimInt, + cached_current_method_ = new (allocator_) HCurrentMethod( + Is64BitInstructionSet(instruction_set_) ? DataType::Type::kInt64 : DataType::Type::kInt32, entry_block_->GetDexPc()); if (entry_block_->GetFirstInstruction() == nullptr) { entry_block_->AddInstruction(cached_current_method_); @@ -537,19 +696,20 @@ std::string HGraph::PrettyMethod(bool with_signature) const { return dex_file_.PrettyMethod(method_idx_, with_signature); } -HConstant* HGraph::GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc) { +HConstant* HGraph::GetConstant(DataType::Type type, int64_t value, uint32_t dex_pc) { switch (type) { - case Primitive::Type::kPrimBoolean: + case DataType::Type::kBool: DCHECK(IsUint<1>(value)); FALLTHROUGH_INTENDED; - case Primitive::Type::kPrimByte: - case Primitive::Type::kPrimChar: - case Primitive::Type::kPrimShort: - case Primitive::Type::kPrimInt: - DCHECK(IsInt(Primitive::ComponentSize(type) * kBitsPerByte, value)); + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: + DCHECK(IsInt(DataType::Size(type) * kBitsPerByte, value)); return GetIntConstant(static_cast<int32_t>(value), dex_pc); - case Primitive::Type::kPrimLong: + case DataType::Type::kInt64: return GetLongConstant(value, dex_pc); default: @@ -661,10 +821,13 @@ void HLoopInformation::Populate() { bool is_irreducible_loop = HasBackEdgeNotDominatedByHeader(); if (is_irreducible_loop) { - ArenaBitVector visited(graph->GetArena(), + // Allocate memory from local ScopedArenaAllocator. + ScopedArenaAllocator allocator(graph->GetArenaStack()); + ArenaBitVector visited(&allocator, graph->GetBlocks().size(), /* expandable */ false, kArenaAllocGraphBuilder); + visited.ClearAllBits(); // Stop marking blocks at the loop header. visited.SetBit(header_->GetBlockId()); @@ -702,6 +865,15 @@ void HLoopInformation::Populate() { graph->SetHasLoops(true); } +void HLoopInformation::PopulateInnerLoopUpwards(HLoopInformation* inner_loop) { + DCHECK(inner_loop->GetPreHeader()->GetLoopInformation() == this); + blocks_.Union(&inner_loop->blocks_); + HLoopInformation* outer_loop = GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(this); + } +} + HBasicBlock* HLoopInformation::GetPreHeader() const { HBasicBlock* block = header_->GetPredecessors()[0]; DCHECK(irreducible_ || (block == header_->GetDominator())); @@ -783,6 +955,13 @@ static void UpdateInputsUsers(HInstruction* instruction) { DCHECK(!instruction->HasEnvironment()); } +void HBasicBlock::ReplaceAndRemovePhiWith(HPhi* initial, HPhi* replacement) { + DCHECK(initial->GetBlock() == this); + InsertPhiAfter(replacement, initial); + initial->ReplaceWith(replacement); + RemovePhi(initial); +} + void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement) { DCHECK(initial->GetBlock() == this); @@ -790,9 +969,9 @@ void HBasicBlock::ReplaceAndRemoveInstructionWith(HInstruction* initial, // We can only replace a control flow instruction with another control flow instruction. DCHECK(replacement->IsControlFlow()); DCHECK_EQ(replacement->GetId(), -1); - DCHECK_EQ(replacement->GetType(), Primitive::kPrimVoid); + DCHECK_EQ(replacement->GetType(), DataType::Type::kVoid); DCHECK_EQ(initial->GetBlock(), this); - DCHECK_EQ(initial->GetType(), Primitive::kPrimVoid); + DCHECK_EQ(initial->GetType(), DataType::Type::kVoid); DCHECK(initial->GetUses().empty()); DCHECK(initial->GetEnvUses().empty()); replacement->SetBlock(this); @@ -893,7 +1072,7 @@ void HBasicBlock::RemoveInstructionOrPhi(HInstruction* instruction, bool ensure_ } } -void HEnvironment::CopyFrom(const ArenaVector<HInstruction*>& locals) { +void HEnvironment::CopyFrom(ArrayRef<HInstruction* const> locals) { for (size_t i = 0; i < locals.size(); i++) { HInstruction* instruction = locals[i]; SetRawEnvAt(i, instruction); @@ -942,10 +1121,6 @@ void HEnvironment::RemoveAsUserOfInput(size_t index) const { user->FixUpUserRecordsAfterEnvUseRemoval(before_env_use_node); } -HInstruction::InstructionKind HInstruction::GetKind() const { - return GetKindInternal(); -} - HInstruction* HInstruction::GetNextDisregardingMoves() const { HInstruction* next = GetNext(); while (next != nullptr && next->IsParallelMove()) { @@ -1168,10 +1343,13 @@ void HVariableInputSizeInstruction::RemoveAllInputs() { DCHECK_EQ(0u, InputCount()); } -void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { +size_t HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { DCHECK(instruction->GetBlock() != nullptr); // Removing constructor fences only makes sense for instructions with an object return type. - DCHECK_EQ(Primitive::kPrimNot, instruction->GetType()); + DCHECK_EQ(DataType::Type::kReference, instruction->GetType()); + + // Return how many instructions were removed for statistic purposes. + size_t remove_count = 0; // Efficient implementation that simultaneously (in one pass): // * Scans the uses list for all constructor fences. @@ -1220,6 +1398,7 @@ void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { // is removed. if (ctor_fence->InputCount() == 0u) { ctor_fence->GetBlock()->RemoveInstruction(ctor_fence); + ++remove_count; } } } @@ -1233,20 +1412,63 @@ void HConstructorFence::RemoveConstructorFences(HInstruction* instruction) { } CHECK(instruction->GetBlock() != nullptr); } + + return remove_count; +} + +void HConstructorFence::Merge(HConstructorFence* other) { + // Do not delete yourself from the graph. + DCHECK(this != other); + // Don't try to merge with an instruction not associated with a block. + DCHECK(other->GetBlock() != nullptr); + // A constructor fence's return type is "kPrimVoid" + // and therefore it cannot have any environment uses. + DCHECK(!other->HasEnvironmentUses()); + + auto has_input = [](HInstruction* haystack, HInstruction* needle) { + // Check if `haystack` has `needle` as any of its inputs. + for (size_t input_count = 0; input_count < haystack->InputCount(); ++input_count) { + if (haystack->InputAt(input_count) == needle) { + return true; + } + } + return false; + }; + + // Add any inputs from `other` into `this` if it wasn't already an input. + for (size_t input_count = 0; input_count < other->InputCount(); ++input_count) { + HInstruction* other_input = other->InputAt(input_count); + if (!has_input(this, other_input)) { + AddInput(other_input); + } + } + + other->GetBlock()->RemoveInstruction(other); } -HInstruction* HConstructorFence::GetAssociatedAllocation() { +HInstruction* HConstructorFence::GetAssociatedAllocation(bool ignore_inputs) { HInstruction* new_instance_inst = GetPrevious(); // Check if the immediately preceding instruction is a new-instance/new-array. // Otherwise this fence is for protecting final fields. if (new_instance_inst != nullptr && (new_instance_inst->IsNewInstance() || new_instance_inst->IsNewArray())) { - // TODO: Need to update this code to handle multiple inputs. - DCHECK_EQ(InputCount(), 1u); - return new_instance_inst; - } else { - return nullptr; + if (ignore_inputs) { + // If inputs are ignored, simply check if the predecessor is + // *any* HNewInstance/HNewArray. + // + // Inputs are normally only ignored for prepare_for_register_allocation, + // at which point *any* prior HNewInstance/Array can be considered + // associated. + return new_instance_inst; + } else { + // Normal case: There must be exactly 1 input and the previous instruction + // must be that input. + if (InputCount() == 1u && InputAt(0) == new_instance_inst) { + return new_instance_inst; + } + } } + return nullptr; } #define DEFINE_ACCEPT(name, super) \ @@ -1287,11 +1509,19 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { if (GetInput()->IsIntConstant()) { int32_t value = GetInput()->AsIntConstant()->GetValue(); switch (GetResultType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt8: + return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); + case DataType::Type::kUint8: + return graph->GetIntConstant(static_cast<uint8_t>(value), GetDexPc()); + case DataType::Type::kInt16: + return graph->GetIntConstant(static_cast<int16_t>(value), GetDexPc()); + case DataType::Type::kUint16: + return graph->GetIntConstant(static_cast<uint16_t>(value), GetDexPc()); + case DataType::Type::kInt64: return graph->GetLongConstant(static_cast<int64_t>(value), GetDexPc()); - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: return graph->GetFloatConstant(static_cast<float>(value), GetDexPc()); - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: return graph->GetDoubleConstant(static_cast<double>(value), GetDexPc()); default: return nullptr; @@ -1299,11 +1529,19 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { } else if (GetInput()->IsLongConstant()) { int64_t value = GetInput()->AsLongConstant()->GetValue(); switch (GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt8: + return graph->GetIntConstant(static_cast<int8_t>(value), GetDexPc()); + case DataType::Type::kUint8: + return graph->GetIntConstant(static_cast<uint8_t>(value), GetDexPc()); + case DataType::Type::kInt16: + return graph->GetIntConstant(static_cast<int16_t>(value), GetDexPc()); + case DataType::Type::kUint16: + return graph->GetIntConstant(static_cast<uint16_t>(value), GetDexPc()); + case DataType::Type::kInt32: return graph->GetIntConstant(static_cast<int32_t>(value), GetDexPc()); - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: return graph->GetFloatConstant(static_cast<float>(value), GetDexPc()); - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: return graph->GetDoubleConstant(static_cast<double>(value), GetDexPc()); default: return nullptr; @@ -1311,7 +1549,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { } else if (GetInput()->IsFloatConstant()) { float value = GetInput()->AsFloatConstant()->GetValue(); switch (GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (std::isnan(value)) return graph->GetIntConstant(0, GetDexPc()); if (value >= kPrimIntMax) @@ -1319,7 +1557,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { if (value <= kPrimIntMin) return graph->GetIntConstant(kPrimIntMin, GetDexPc()); return graph->GetIntConstant(static_cast<int32_t>(value), GetDexPc()); - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (std::isnan(value)) return graph->GetLongConstant(0, GetDexPc()); if (value >= kPrimLongMax) @@ -1327,7 +1565,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { if (value <= kPrimLongMin) return graph->GetLongConstant(kPrimLongMin, GetDexPc()); return graph->GetLongConstant(static_cast<int64_t>(value), GetDexPc()); - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: return graph->GetDoubleConstant(static_cast<double>(value), GetDexPc()); default: return nullptr; @@ -1335,7 +1573,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { } else if (GetInput()->IsDoubleConstant()) { double value = GetInput()->AsDoubleConstant()->GetValue(); switch (GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (std::isnan(value)) return graph->GetIntConstant(0, GetDexPc()); if (value >= kPrimIntMax) @@ -1343,7 +1581,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { if (value <= kPrimLongMin) return graph->GetIntConstant(kPrimIntMin, GetDexPc()); return graph->GetIntConstant(static_cast<int32_t>(value), GetDexPc()); - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (std::isnan(value)) return graph->GetLongConstant(0, GetDexPc()); if (value >= kPrimLongMax) @@ -1351,7 +1589,7 @@ HConstant* HTypeConversion::TryStaticEvaluation() const { if (value <= kPrimLongMin) return graph->GetLongConstant(kPrimLongMin, GetDexPc()); return graph->GetLongConstant(static_cast<int64_t>(value), GetDexPc()); - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: return graph->GetFloatConstant(static_cast<float>(value), GetDexPc()); default: return nullptr; @@ -1559,8 +1797,8 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK_EQ(cursor->GetBlock(), this); - HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), - cursor->GetDexPc()); + HBasicBlock* new_block = + new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph(), cursor->GetDexPc()); new_block->instructions_.first_instruction_ = cursor; new_block->instructions_.last_instruction_ = instructions_.last_instruction_; instructions_.last_instruction_ = cursor->previous_; @@ -1572,7 +1810,7 @@ HBasicBlock* HBasicBlock::SplitBefore(HInstruction* cursor) { } new_block->instructions_.SetBlockOfInstructions(new_block); - AddInstruction(new (GetGraph()->GetArena()) HGoto(new_block->GetDexPc())); + AddInstruction(new (GetGraph()->GetAllocator()) HGoto(new_block->GetDexPc())); for (HBasicBlock* successor : GetSuccessors()) { successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block; @@ -1589,7 +1827,7 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() { DCHECK(!graph_->IsInSsaForm()) << "Support for SSA form not implemented."; DCHECK(!IsCatchBlock()) << "Support for updating try/catch information not implemented."; - HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); + HBasicBlock* new_block = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph(), GetDexPc()); for (HBasicBlock* predecessor : GetPredecessors()) { predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block; @@ -1605,8 +1843,8 @@ HBasicBlock* HBasicBlock::CreateImmediateDominator() { HBasicBlock* HBasicBlock::SplitBeforeForInlining(HInstruction* cursor) { DCHECK_EQ(cursor->GetBlock(), this); - HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), - cursor->GetDexPc()); + HBasicBlock* new_block = + new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph(), cursor->GetDexPc()); new_block->instructions_.first_instruction_ = cursor; new_block->instructions_.last_instruction_ = instructions_.last_instruction_; instructions_.last_instruction_ = cursor->previous_; @@ -1638,7 +1876,7 @@ HBasicBlock* HBasicBlock::SplitAfterForInlining(HInstruction* cursor) { DCHECK_NE(instructions_.last_instruction_, cursor); DCHECK_EQ(cursor->GetBlock(), this); - HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc()); + HBasicBlock* new_block = new (GetGraph()->GetAllocator()) HBasicBlock(GetGraph(), GetDexPc()); new_block->instructions_.first_instruction_ = cursor->GetNext(); new_block->instructions_.last_instruction_ = instructions_.last_instruction_; cursor->next_->previous_ = nullptr; @@ -1697,6 +1935,15 @@ bool HBasicBlock::IsSingleGoto() const { return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsGoto(); } +bool HBasicBlock::IsSingleReturn() const { + return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsReturn(); +} + +bool HBasicBlock::IsSingleReturnOrReturnVoidAllowingPhis() const { + return (GetFirstInstruction() == GetLastInstruction()) && + (GetLastInstruction()->IsReturn() || GetLastInstruction()->IsReturnVoid()); +} + bool HBasicBlock::IsSingleTryBoundary() const { return HasOnlyOneInstruction(*this) && GetLastInstruction()->IsTryBoundary(); } @@ -1930,7 +2177,7 @@ void HBasicBlock::DisconnectAndDelete() { last_instruction->IsPackedSwitch() || (last_instruction->IsTryBoundary() && IsCatchBlock())); predecessor->RemoveInstruction(last_instruction); - predecessor->AddInstruction(new (graph_->GetArena()) HGoto(last_instruction->GetDexPc())); + predecessor->AddInstruction(new (graph_->GetAllocator()) HGoto(last_instruction->GetDexPc())); } else if (num_pred_successors == 0u) { // The predecessor has no remaining successors and therefore must be dead. // We deliberately leave it without a control-flow instruction so that the @@ -2141,7 +2388,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { if (current->NeedsEnvironment()) { DCHECK(current->HasEnvironment()); current->GetEnvironment()->SetAndCopyParentChain( - outer_graph->GetArena(), invoke->GetEnvironment()); + outer_graph->GetAllocator(), invoke->GetEnvironment()); } } } @@ -2194,7 +2441,7 @@ HInstruction* HGraph::InlineInto(HGraph* outer_graph, HInvoke* invoke) { // into two blocks, merge the first block of the inlined graph into // the first half, and replace the exit block of the inlined graph // with the second half. - ArenaAllocator* allocator = outer_graph->GetArena(); + ArenaAllocator* allocator = outer_graph->GetAllocator(); HBasicBlock* at = invoke->GetBlock(); // Note that we split before the invoke only to simplify polymorphic inlining. HBasicBlock* to = at->SplitBeforeForInlining(invoke); @@ -2378,10 +2625,10 @@ void HGraph::TransformLoopHeaderForBCE(HBasicBlock* header) { HBasicBlock* old_pre_header = header->GetDominator(); // Need extra block to avoid critical edge. - HBasicBlock* if_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* true_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* false_block = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* if_block = new (allocator_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* true_block = new (allocator_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* false_block = new (allocator_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (allocator_) HBasicBlock(this, header->GetDexPc()); AddBlock(if_block); AddBlock(true_block); AddBlock(false_block); @@ -2436,9 +2683,9 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, HLoopInformation* loop = header->GetLoopInformation(); // Add new loop blocks. - HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc()); - HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_pre_header = new (allocator_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_header = new (allocator_) HBasicBlock(this, header->GetDexPc()); + HBasicBlock* new_body = new (allocator_) HBasicBlock(this, header->GetDexPc()); AddBlock(new_pre_header); AddBlock(new_header); AddBlock(new_body); @@ -2470,10 +2717,10 @@ HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header, reverse_post_order_[index_of_body] = new_body; // Add gotos and suspend check (client must add conditional in header). - new_pre_header->AddInstruction(new (arena_) HGoto()); - HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc()); + new_pre_header->AddInstruction(new (allocator_) HGoto()); + HSuspendCheck* suspend_check = new (allocator_) HSuspendCheck(header->GetDexPc()); new_header->AddInstruction(suspend_check); - new_body->AddInstruction(new (arena_) HGoto()); + new_body->AddInstruction(new (allocator_) HGoto()); suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment( loop->GetSuspendCheck()->GetEnvironment(), header); @@ -2505,7 +2752,7 @@ static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo uppe void HInstruction::SetReferenceTypeInfo(ReferenceTypeInfo rti) { if (kIsDebugBuild) { - DCHECK_EQ(GetType(), Primitive::kPrimNot); + DCHECK_EQ(GetType(), DataType::Type::kReference); ScopedObjectAccess soa(Thread::Current()); DCHECK(rti.IsValid()) << "Invalid RTI for " << DebugName(); if (IsBoundType()) { @@ -2678,6 +2925,7 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: + case LoadKind::kBootImageClassTable: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetClass().Get() == other_load_class->GetClass().Get(); @@ -2688,21 +2936,6 @@ bool HLoadClass::InstructionDataEquals(const HInstruction* other) const { } } -void HLoadClass::SetLoadKind(LoadKind load_kind) { - SetPackedField<LoadKindField>(load_kind); - - if (load_kind != LoadKind::kRuntimeCall && - load_kind != LoadKind::kReferrersClass) { - RemoveAsUserOfInput(0u); - SetRawInputAt(0u, nullptr); - } - - if (!NeedsEnvironment()) { - RemoveEnvironment(); - SetSideEffects(SideEffects::None()); - } -} - std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { switch (rhs) { case HLoadClass::LoadKind::kReferrersClass: @@ -2711,6 +2944,8 @@ std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs) { return os << "BootImageLinkTimePcRelative"; case HLoadClass::LoadKind::kBootImageAddress: return os << "BootImageAddress"; + case HLoadClass::LoadKind::kBootImageClassTable: + return os << "BootImageClassTable"; case HLoadClass::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadClass::LoadKind::kJitTableAddress: @@ -2733,6 +2968,7 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { } switch (GetLoadKind()) { case LoadKind::kBootImageAddress: + case LoadKind::kBootImageInternTable: case LoadKind::kJitTableAddress: { ScopedObjectAccess soa(Thread::Current()); return GetString().Get() == other_load_string->GetString().Get(); @@ -2742,27 +2978,14 @@ bool HLoadString::InstructionDataEquals(const HInstruction* other) const { } } -void HLoadString::SetLoadKind(LoadKind load_kind) { - // Once sharpened, the load kind should not be changed again. - DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); - SetPackedField<LoadKindField>(load_kind); - - if (load_kind != LoadKind::kRuntimeCall) { - RemoveAsUserOfInput(0u); - SetRawInputAt(0u, nullptr); - } - if (!NeedsEnvironment()) { - RemoveEnvironment(); - SetSideEffects(SideEffects::None()); - } -} - std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs) { switch (rhs) { case HLoadString::LoadKind::kBootImageLinkTimePcRelative: return os << "BootImageLinkTimePcRelative"; case HLoadString::LoadKind::kBootImageAddress: return os << "BootImageAddress"; + case HLoadString::LoadKind::kBootImageInternTable: + return os << "BootImageInternTable"; case HLoadString::LoadKind::kBssEntry: return os << "BssEntry"; case HLoadString::LoadKind::kJitTableAddress: @@ -2783,12 +3006,34 @@ void HInstruction::RemoveEnvironmentUsers() { env_uses_.clear(); } +HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr) { + HInstruction* clone = instr->Clone(instr->GetBlock()->GetGraph()->GetAllocator()); + HBasicBlock* block = instr->GetBlock(); + + if (instr->IsPhi()) { + HPhi* phi = instr->AsPhi(); + DCHECK(!phi->HasEnvironment()); + HPhi* phi_clone = clone->AsPhi(); + block->ReplaceAndRemovePhiWith(phi, phi_clone); + } else { + block->ReplaceAndRemoveInstructionWith(instr, clone); + if (instr->HasEnvironment()) { + clone->CopyEnvironmentFrom(instr->GetEnvironment()); + HLoopInformation* loop_info = block->GetLoopInformation(); + if (instr->IsSuspendCheck() && loop_info != nullptr) { + loop_info->SetSuspendCheck(clone->AsSuspendCheck()); + } + } + } + return clone; +} + // Returns an instruction with the opposite Boolean value from 'cond'. HInstruction* HGraph::InsertOppositeCondition(HInstruction* cond, HInstruction* cursor) { - ArenaAllocator* allocator = GetArena(); + ArenaAllocator* allocator = GetAllocator(); if (cond->IsCondition() && - !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType())) { + !DataType::IsFloatingPointType(cond->InputAt(0)->GetType())) { // Can't reverse floating point conditions. We have to use HBooleanNot in that case. HInstruction* lhs = cond->InputAt(0); HInstruction* rhs = cond->InputAt(1); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 5e072cdb67..fe992a7f39 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -26,25 +26,27 @@ #include "base/arena_object.h" #include "base/array_ref.h" #include "base/iteration_range.h" +#include "base/quasi_atomic.h" #include "base/stl_util.h" #include "base/transform_array_ref.h" -#include "dex_file.h" -#include "dex_file_types.h" +#include "data_type.h" #include "deoptimization_kind.h" +#include "dex/dex_file.h" +#include "dex/dex_file_types.h" +#include "dex/invoke_type.h" +#include "dex/method_reference.h" #include "entrypoints/quick/quick_entrypoints_enum.h" #include "handle.h" #include "handle_scope.h" -#include "invoke_type.h" #include "intrinsics_enum.h" #include "locations.h" -#include "method_reference.h" #include "mirror/class.h" #include "offsets.h" -#include "primitive.h" #include "utils/intrusive_forward_list.h" namespace art { +class ArenaStack; class GraphChecker; class HBasicBlock; class HConstructorFence; @@ -305,7 +307,8 @@ std::ostream& operator<<(std::ostream& os, const ReferenceTypeInfo& rhs); // Control-flow graph of a method. Contains a list of basic blocks. class HGraph : public ArenaObject<kArenaAllocGraph> { public: - HGraph(ArenaAllocator* arena, + HGraph(ArenaAllocator* allocator, + ArenaStack* arena_stack, const DexFile& dex_file, uint32_t method_idx, InstructionSet instruction_set, @@ -313,10 +316,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool debuggable = false, bool osr = false, int start_instruction_id = 0) - : arena_(arena), - blocks_(arena->Adapter(kArenaAllocBlockList)), - reverse_post_order_(arena->Adapter(kArenaAllocReversePostOrder)), - linear_order_(arena->Adapter(kArenaAllocLinearOrder)), + : allocator_(allocator), + arena_stack_(arena_stack), + blocks_(allocator->Adapter(kArenaAllocBlockList)), + reverse_post_order_(allocator->Adapter(kArenaAllocReversePostOrder)), + linear_order_(allocator->Adapter(kArenaAllocLinearOrder)), entry_block_(nullptr), exit_block_(nullptr), maximum_number_of_out_vregs_(0), @@ -337,22 +341,23 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { number_of_cha_guards_(0), instruction_set_(instruction_set), cached_null_constant_(nullptr), - cached_int_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), - cached_float_constants_(std::less<int32_t>(), arena->Adapter(kArenaAllocConstantsMap)), - cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), - cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), + cached_int_constants_(std::less<int32_t>(), allocator->Adapter(kArenaAllocConstantsMap)), + cached_float_constants_(std::less<int32_t>(), allocator->Adapter(kArenaAllocConstantsMap)), + cached_long_constants_(std::less<int64_t>(), allocator->Adapter(kArenaAllocConstantsMap)), + cached_double_constants_(std::less<int64_t>(), allocator->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), art_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), osr_(osr), - cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) { + cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)) { blocks_.reserve(kDefaultNumberOfBlocks); } // Acquires and stores RTI of inexact Object to be used when creating HNullConstant. void InitializeInexactObjectRTI(VariableSizedHandleScope* handles); - ArenaAllocator* GetArena() const { return arena_; } + ArenaAllocator* GetAllocator() const { return allocator_; } + ArenaStack* GetArenaStack() const { return arena_stack_; } const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; } bool IsInSsaForm() const { return in_ssa_form_; } @@ -418,6 +423,18 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { HBasicBlock* SplitEdge(HBasicBlock* block, HBasicBlock* successor); void SplitCriticalEdge(HBasicBlock* block, HBasicBlock* successor); + void OrderLoopHeaderPredecessors(HBasicBlock* header); + + // Transform a loop into a format with a single preheader. + // + // Each phi in the header should be split: original one in the header should only hold + // inputs reachable from the back edges and a single input from the preheader. The newly created + // phi in the preheader should collate the inputs from the original multiple incoming blocks. + // + // Loops in the graph typically have a single preheader, so this method is used to "repair" loops + // that no longer have this property. + void TransformLoopToSinglePreheaderFormat(HBasicBlock* header); + void SimplifyLoop(HBasicBlock* header); int32_t GetNextInstructionId() { @@ -510,7 +527,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // Returns a constant of the given type and value. If it does not exist // already, it is created and inserted into the graph. This method is only for // integral types. - HConstant* GetConstant(Primitive::Type type, int64_t value, uint32_t dex_pc = kNoDexPc); + HConstant* GetConstant(DataType::Type type, int64_t value, uint32_t dex_pc = kNoDexPc); // TODO: This is problematic for the consistency of reference type propagation // because it can be created anytime after the pass and thus it will be left @@ -612,7 +629,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // If not found or previously deleted, create and cache a new instruction. // Don't bother reviving a previously deleted instruction, for simplicity. if (constant == nullptr || constant->GetBlock() == nullptr) { - constant = new (arena_) InstructionType(value, dex_pc); + constant = new (allocator_) InstructionType(value, dex_pc); cache->Overwrite(value, constant); InsertConstant(constant); } @@ -628,7 +645,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // See CacheFloatConstant comment. void CacheDoubleConstant(HDoubleConstant* constant); - ArenaAllocator* const arena_; + ArenaAllocator* const allocator_; + ArenaStack* const arena_stack_; // List of blocks in insertion order. ArenaVector<HBasicBlock*> blocks_; @@ -750,9 +768,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { suspend_check_(nullptr), irreducible_(false), contains_irreducible_loop_(false), - back_edges_(graph->GetArena()->Adapter(kArenaAllocLoopInfoBackEdges)), + back_edges_(graph->GetAllocator()->Adapter(kArenaAllocLoopInfoBackEdges)), // Make bit vector growable, as the number of blocks may change. - blocks_(graph->GetArena(), graph->GetBlocks().size(), true, kArenaAllocLoopInfoBackEdges) { + blocks_(graph->GetAllocator(), + graph->GetBlocks().size(), + true, + kArenaAllocLoopInfoBackEdges) { back_edges_.reserve(kDefaultNumberOfBackEdges); } @@ -806,6 +827,10 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { // Finds blocks that are part of this loop. void Populate(); + // Updates blocks population of the loop and all of its outer' ones recursively after the + // population of the inner loop is updated. + void PopulateInnerLoopUpwards(HLoopInformation* inner_loop); + // Returns whether this loop information contains `block`. // Note that this loop information *must* be populated before entering this function. bool Contains(const HBasicBlock& block) const; @@ -836,6 +861,12 @@ class HLoopInformation : public ArenaObject<kArenaAllocLoopInfo> { bool HasExitEdge() const; + // Resets back edge and blocks-in-loop data. + void ResetBasicBlockData() { + back_edges_.clear(); + ClearAllBlocks(); + } + private: // Internal recursive implementation of `Populate`. void PopulateRecursive(HBasicBlock* block); @@ -915,11 +946,11 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { public: explicit HBasicBlock(HGraph* graph, uint32_t dex_pc = kNoDexPc) : graph_(graph), - predecessors_(graph->GetArena()->Adapter(kArenaAllocPredecessors)), - successors_(graph->GetArena()->Adapter(kArenaAllocSuccessors)), + predecessors_(graph->GetAllocator()->Adapter(kArenaAllocPredecessors)), + successors_(graph->GetAllocator()->Adapter(kArenaAllocSuccessors)), loop_information_(nullptr), dominator_(nullptr), - dominated_blocks_(graph->GetArena()->Adapter(kArenaAllocDominated)), + dominated_blocks_(graph->GetAllocator()->Adapter(kArenaAllocDominated)), block_id_(kInvalidBlockId), dex_pc_(dex_pc), lifetime_start_(kNoLifetime), @@ -958,6 +989,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { } bool IsSingleGoto() const; + bool IsSingleReturn() const; + bool IsSingleReturnOrReturnVoidAllowingPhis() const; bool IsSingleTryBoundary() const; // Returns true if this block emits nothing but a jump. @@ -970,12 +1003,24 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { void AddBackEdge(HBasicBlock* back_edge) { if (loop_information_ == nullptr) { - loop_information_ = new (graph_->GetArena()) HLoopInformation(this, graph_); + loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_); } DCHECK_EQ(loop_information_->GetHeader(), this); loop_information_->AddBackEdge(back_edge); } + // Registers a back edge; if the block was not a loop header before the call associates a newly + // created loop info with it. + // + // Used in SuperblockCloner to preserve LoopInformation object instead of reseting loop + // info for all blocks during back edges recalculation. + void AddBackEdgeWhileUpdating(HBasicBlock* back_edge) { + if (loop_information_ == nullptr || loop_information_->GetHeader() != this) { + loop_information_ = new (graph_->GetAllocator()) HLoopInformation(this, graph_); + } + loop_information_->AddBackEdge(back_edge); + } + HGraph* GetGraph() const { return graph_; } void SetGraph(HGraph* graph) { graph_ = graph; } @@ -1149,6 +1194,8 @@ class HBasicBlock : public ArenaObject<kArenaAllocBasicBlock> { // Insert `instruction` before/after an existing instruction `cursor`. void InsertInstructionBefore(HInstruction* instruction, HInstruction* cursor); void InsertInstructionAfter(HInstruction* instruction, HInstruction* cursor); + // Replace phi `initial` with `replacement` within this block. + void ReplaceAndRemovePhiWith(HPhi* initial, HPhi* replacement); // Replace instruction `initial` with `replacement` within this block. void ReplaceAndRemoveInstructionWith(HInstruction* initial, HInstruction* replacement); @@ -1324,6 +1371,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(InstanceFieldSet, Instruction) \ M(InstanceOf, Instruction) \ M(IntConstant, Constant) \ + M(IntermediateAddress, Instruction) \ M(InvokeUnresolved, Invoke) \ M(InvokeInterface, Invoke) \ M(InvokeStaticOrDirect, Invoke) \ @@ -1372,7 +1420,8 @@ class HLoopInformationOutwardIterator : public ValueObject { M(UShr, BinaryOperation) \ M(Xor, BinaryOperation) \ M(VecReplicateScalar, VecUnaryOperation) \ - M(VecSumReduce, VecUnaryOperation) \ + M(VecExtractScalar, VecUnaryOperation) \ + M(VecReduce, VecUnaryOperation) \ M(VecCnv, VecUnaryOperation) \ M(VecNeg, VecUnaryOperation) \ M(VecAbs, VecUnaryOperation) \ @@ -1393,6 +1442,7 @@ class HLoopInformationOutwardIterator : public ValueObject { M(VecUShr, VecBinaryOperation) \ M(VecSetScalars, VecOperation) \ M(VecMultiplyAccumulate, VecOperation) \ + M(VecSADAccumulate, VecOperation) \ M(VecLoad, VecMemoryOperation) \ M(VecStore, VecMemoryOperation) \ @@ -1406,7 +1456,6 @@ class HLoopInformationOutwardIterator : public ValueObject { M(BitwiseNegatedRight, Instruction) \ M(DataProcWithShifterOp, Instruction) \ M(MultiplyAccumulate, Instruction) \ - M(IntermediateAddress, Instruction) \ M(IntermediateAddressIndex, Instruction) #endif @@ -1419,7 +1468,8 @@ class HLoopInformationOutwardIterator : public ValueObject { #else #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M) \ M(MipsComputeBaseMethodAddress, Instruction) \ - M(MipsPackedSwitch, Instruction) + M(MipsPackedSwitch, Instruction) \ + M(IntermediateArrayAddressIndex, Instruction) #endif #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS64(M) @@ -1466,18 +1516,30 @@ FOR_EACH_INSTRUCTION(FORWARD_DECLARATION) #undef FORWARD_DECLARATION #define DECLARE_INSTRUCTION(type) \ - InstructionKind GetKindInternal() const OVERRIDE { return k##type; } \ + private: \ + H##type& operator=(const H##type&) = delete; \ + public: \ const char* DebugName() const OVERRIDE { return #type; } \ bool InstructionTypeEquals(const HInstruction* other) const OVERRIDE { \ return other->Is##type(); \ } \ + HInstruction* Clone(ArenaAllocator* arena) const OVERRIDE { \ + DCHECK(IsClonable()); \ + return new (arena) H##type(*this->As##type()); \ + } \ void Accept(HGraphVisitor* visitor) OVERRIDE #define DECLARE_ABSTRACT_INSTRUCTION(type) \ + private: \ + H##type& operator=(const H##type&) = delete; \ + public: \ bool Is##type() const { return As##type() != nullptr; } \ const H##type* As##type() const { return this; } \ H##type* As##type() { return this; } +#define DEFAULT_COPY_CONSTRUCTOR(type) \ + explicit H##type(const H##type& other) = default; + template <typename T> class HUseListNode : public ArenaObject<kArenaAllocUseListNode>, public IntrusiveForwardListNode<HUseListNode<T>> { @@ -1563,7 +1625,7 @@ using HConstInputsRef = TransformArrayRef<const HUserRecord<HInstruction*>, HInp * The internal representation uses 38-bit and is described in the table below. * The first line indicates the side effect, and for field/array accesses the * second line indicates the type of the access (in the order of the - * Primitive::Type enum). + * DataType::Type enum). * The two numbered lines below indicate the bit position in the bitfield (read * vertically). * @@ -1612,23 +1674,23 @@ class SideEffects : public ValueObject { return SideEffects(kAllReads); } - static SideEffects FieldWriteOfType(Primitive::Type type, bool is_volatile) { + static SideEffects FieldWriteOfType(DataType::Type type, bool is_volatile) { return is_volatile ? AllWritesAndReads() : SideEffects(TypeFlag(type, kFieldWriteOffset)); } - static SideEffects ArrayWriteOfType(Primitive::Type type) { + static SideEffects ArrayWriteOfType(DataType::Type type) { return SideEffects(TypeFlag(type, kArrayWriteOffset)); } - static SideEffects FieldReadOfType(Primitive::Type type, bool is_volatile) { + static SideEffects FieldReadOfType(DataType::Type type, bool is_volatile) { return is_volatile ? AllWritesAndReads() : SideEffects(TypeFlag(type, kFieldReadOffset)); } - static SideEffects ArrayReadOfType(Primitive::Type type) { + static SideEffects ArrayReadOfType(DataType::Type type) { return SideEffects(TypeFlag(type, kArrayReadOffset)); } @@ -1756,14 +1818,26 @@ class SideEffects : public ValueObject { static constexpr uint64_t kAllReads = ((1ULL << (kLastBitForReads + 1 - kFieldReadOffset)) - 1) << kFieldReadOffset; - // Translates type to bit flag. - static uint64_t TypeFlag(Primitive::Type type, int offset) { - CHECK_NE(type, Primitive::kPrimVoid); - const uint64_t one = 1; - const int shift = type; // 0-based consecutive enum + // Translates type to bit flag. The type must correspond to a Java type. + static uint64_t TypeFlag(DataType::Type type, int offset) { + int shift; + switch (type) { + case DataType::Type::kReference: shift = 0; break; + case DataType::Type::kBool: shift = 1; break; + case DataType::Type::kInt8: shift = 2; break; + case DataType::Type::kUint16: shift = 3; break; + case DataType::Type::kInt16: shift = 4; break; + case DataType::Type::kInt32: shift = 5; break; + case DataType::Type::kInt64: shift = 6; break; + case DataType::Type::kFloat32: shift = 7; break; + case DataType::Type::kFloat64: shift = 8; break; + default: + LOG(FATAL) << "Unexpected data type " << type; + UNREACHABLE(); + } DCHECK_LE(kFieldWriteOffset, shift); DCHECK_LT(shift, kArrayWriteOffset); - return one << (type + offset); + return UINT64_C(1) << (shift + offset); } // Private constructor on direct flags value. @@ -1775,21 +1849,23 @@ class SideEffects : public ValueObject { // A HEnvironment object contains the values of virtual registers at a given location. class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { public: - ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, + ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator, size_t number_of_vregs, ArtMethod* method, uint32_t dex_pc, HInstruction* holder) - : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)), - locations_(arena->Adapter(kArenaAllocEnvironmentLocations)), + : vregs_(number_of_vregs, allocator->Adapter(kArenaAllocEnvironmentVRegs)), + locations_(allocator->Adapter(kArenaAllocEnvironmentLocations)), parent_(nullptr), method_(method), dex_pc_(dex_pc), holder_(holder) { } - ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder) - : HEnvironment(arena, + ALWAYS_INLINE HEnvironment(ArenaAllocator* allocator, + const HEnvironment& to_copy, + HInstruction* holder) + : HEnvironment(allocator, to_copy.Size(), to_copy.GetMethod(), to_copy.GetDexPc(), @@ -1812,7 +1888,7 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { } } - void CopyFrom(const ArenaVector<HInstruction*>& locals); + void CopyFrom(ArrayRef<HInstruction* const> locals); void CopyFrom(HEnvironment* environment); // Copy from `env`. If it's a loop phi for `loop_header`, copy the first @@ -1876,7 +1952,14 @@ class HEnvironment : public ArenaObject<kArenaAllocEnvironment> { class HInstruction : public ArenaObject<kArenaAllocInstruction> { public: - HInstruction(SideEffects side_effects, uint32_t dex_pc) +#define DECLARE_KIND(type, super) k##type, + enum InstructionKind { + FOR_EACH_INSTRUCTION(DECLARE_KIND) + kLastInstructionKind + }; +#undef DECLARE_KIND + + HInstruction(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) : previous_(nullptr), next_(nullptr), block_(nullptr), @@ -1890,16 +1973,12 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { lifetime_position_(kNoLifetime), side_effects_(side_effects), reference_type_handle_(ReferenceTypeInfo::CreateInvalid().GetTypeHandle()) { + SetPackedField<InstructionKindField>(kind); SetPackedFlag<kFlagReferenceTypeIsExact>(ReferenceTypeInfo::CreateInvalid().IsExact()); } virtual ~HInstruction() {} -#define DECLARE_KIND(type, super) k##type, - enum InstructionKind { - FOR_EACH_INSTRUCTION(DECLARE_KIND) - }; -#undef DECLARE_KIND HInstruction* GetNext() const { return next_; } HInstruction* GetPrevious() const { return previous_; } @@ -1908,7 +1987,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { HInstruction* GetPreviousDisregardingMoves() const; HBasicBlock* GetBlock() const { return block_; } - ArenaAllocator* GetArena() const { return block_->GetGraph()->GetArena(); } + ArenaAllocator* GetAllocator() const { return block_->GetGraph()->GetAllocator(); } void SetBlock(HBasicBlock* block) { block_ = block; } bool IsInBlock() const { return block_ != nullptr; } bool IsInLoop() const { return block_->IsInLoop(); } @@ -1952,7 +2031,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { virtual void Accept(HGraphVisitor* visitor) = 0; virtual const char* DebugName() const = 0; - virtual Primitive::Type GetType() const { return Primitive::kPrimVoid; } + virtual DataType::Type GetType() const { return DataType::Type::kVoid; } virtual bool NeedsEnvironment() const { return false; } @@ -1964,6 +2043,10 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance), // could throw OOME, but it is still OK to remove them if they are unused. virtual bool CanThrow() const { return false; } + + // Does the instruction always throw an exception unconditionally? + virtual bool AlwaysThrows() const { return false; } + bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); } bool HasSideEffects() const { return side_effects_.HasSideEffects(); } @@ -1973,7 +2056,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // simplifies the null check elimination. // TODO: Consider merging can_be_null into ReferenceTypeInfo. virtual bool CanBeNull() const { - DCHECK_EQ(GetType(), Primitive::kPrimNot) << "CanBeNull only applies to reference types"; + DCHECK_EQ(GetType(), DataType::Type::kReference) << "CanBeNull only applies to reference types"; return true; } @@ -1982,13 +2065,13 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { } virtual bool IsActualObject() const { - return GetType() == Primitive::kPrimNot; + return GetType() == DataType::Type::kReference; } void SetReferenceTypeInfo(ReferenceTypeInfo rti); ReferenceTypeInfo GetReferenceTypeInfo() const { - DCHECK_EQ(GetType(), Primitive::kPrimNot); + DCHECK_EQ(GetType(), DataType::Type::kReference); return ReferenceTypeInfo::CreateUnchecked(reference_type_handle_, GetPackedFlag<kFlagReferenceTypeIsExact>()); } @@ -1998,7 +2081,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Note: fixup_end remains valid across push_front(). auto fixup_end = uses_.empty() ? uses_.begin() : ++uses_.begin(); HUseListNode<HInstruction*>* new_node = - new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HInstruction*>(user, index); + new (GetBlock()->GetGraph()->GetAllocator()) HUseListNode<HInstruction*>(user, index); uses_.push_front(*new_node); FixUpUserRecordsAfterUseInsertion(fixup_end); } @@ -2008,7 +2091,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // Note: env_fixup_end remains valid across push_front(). auto env_fixup_end = env_uses_.empty() ? env_uses_.begin() : ++env_uses_.begin(); HUseListNode<HEnvironment*>* new_node = - new (GetBlock()->GetGraph()->GetArena()) HUseListNode<HEnvironment*>(user, index); + new (GetBlock()->GetGraph()->GetAllocator()) HUseListNode<HEnvironment*>(user, index); env_uses_.push_front(*new_node); FixUpUserRecordsAfterEnvUseInsertion(env_fixup_end); } @@ -2091,7 +2174,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // copying, the uses lists are being updated. void CopyEnvironmentFrom(HEnvironment* environment) { DCHECK(environment_ == nullptr); - ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetAllocator(); environment_ = new (allocator) HEnvironment(allocator, *environment, this); environment_->CopyFrom(environment); if (environment->GetParent() != nullptr) { @@ -2102,7 +2185,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { void CopyEnvironmentFromWithLoopPhiAdjustment(HEnvironment* environment, HBasicBlock* block) { DCHECK(environment_ == nullptr); - ArenaAllocator* allocator = GetBlock()->GetGraph()->GetArena(); + ArenaAllocator* allocator = GetBlock()->GetGraph()->GetAllocator(); environment_ = new (allocator) HEnvironment(allocator, *environment, this); environment_->CopyFromWithLoopPhiAdjustment(environment, block); if (environment->GetParent() != nullptr) { @@ -2154,6 +2237,25 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { FOR_EACH_ABSTRACT_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK + // Return a clone of the instruction if it is clonable (shallow copy by default, custom copy + // if a custom copy-constructor is provided for a particular type). If IsClonable() is false for + // the instruction then the behaviour of this function is undefined. + // + // Note: It is semantically valid to create a clone of the instruction only until + // prepare_for_register_allocator phase as lifetime, intervals and codegen info are not + // copied. + // + // Note: HEnvironment and some other fields are not copied and are set to default values, see + // 'explicit HInstruction(const HInstruction& other)' for details. + virtual HInstruction* Clone(ArenaAllocator* arena ATTRIBUTE_UNUSED) const { + LOG(FATAL) << "Cloning is not implemented for the instruction " << + DebugName() << " " << GetId(); + UNREACHABLE(); + } + + // Return whether instruction can be cloned (copied). + virtual bool IsClonable() const { return false; } + // Returns whether the instruction can be moved within the graph. // TODO: this method is used by LICM and GVN with possibly different // meanings? split and rename? @@ -2180,8 +2282,7 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // is adopted and implemented by our C++ compiler(s). Fow now, we need to hide // the virtual function because the __attribute__((__pure__)) doesn't really // apply the strong requirement for virtual functions, preventing optimizations. - InstructionKind GetKind() const PURE; - virtual InstructionKind GetKindInternal() const = 0; + InstructionKind GetKind() const { return GetPackedField<InstructionKindField>(); } virtual size_t ComputeHashCode() const { size_t result = GetKind(); @@ -2233,9 +2334,16 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { // its users. Used by liveness analysis to compute use positions accordingly. static constexpr size_t kFlagEmittedAtUseSite = 0u; static constexpr size_t kFlagReferenceTypeIsExact = kFlagEmittedAtUseSite + 1; - static constexpr size_t kNumberOfGenericPackedBits = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKind = kFlagReferenceTypeIsExact + 1; + static constexpr size_t kFieldInstructionKindSize = + MinimumBitsToStore(static_cast<size_t>(InstructionKind::kLastInstructionKind - 1)); + static constexpr size_t kNumberOfGenericPackedBits = + kFieldInstructionKind + kFieldInstructionKindSize; static constexpr size_t kMaxNumberOfPackedBits = sizeof(uint32_t) * kBitsPerByte; + static_assert(kNumberOfGenericPackedBits <= kMaxNumberOfPackedBits, + "Too many generic packed fields"); + const HUserRecord<HInstruction*> InputRecordAt(size_t i) const { return GetInputRecords()[i]; } @@ -2270,7 +2378,35 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { packed_fields_ = BitFieldType::Update(value, packed_fields_); } + // Copy construction for the instruction (used for Clone function). + // + // Fields (e.g. lifetime, intervals and codegen info) associated with phases starting from + // prepare_for_register_allocator are not copied (set to default values). + // + // Copy constructors must be provided for every HInstruction type; default copy constructor is + // fine for most of them. However for some of the instructions a custom copy constructor must be + // specified (when instruction has non-trivially copyable fields and must have a special behaviour + // for copying them). + explicit HInstruction(const HInstruction& other) + : previous_(nullptr), + next_(nullptr), + block_(nullptr), + dex_pc_(other.dex_pc_), + id_(-1), + ssa_index_(-1), + packed_fields_(other.packed_fields_), + environment_(nullptr), + locations_(nullptr), + live_interval_(nullptr), + lifetime_position_(kNoLifetime), + side_effects_(other.side_effects_), + reference_type_handle_(other.reference_type_handle_) { + } + private: + using InstructionKindField = + BitField<InstructionKind, kFieldInstructionKind, kFieldInstructionKindSize>; + void FixUpUserRecordsAfterUseInsertion(HUseList<HInstruction*>::iterator fixup_end) { auto before_use_node = uses_.before_begin(); for (auto use_node = uses_.begin(); use_node != fixup_end; ++use_node) { @@ -2359,8 +2495,6 @@ class HInstruction : public ArenaObject<kArenaAllocInstruction> { friend class HEnvironment; friend class HGraph; friend class HInstructionList; - - DISALLOW_COPY_AND_ASSIGN(HInstruction); }; std::ostream& operator<<(std::ostream& os, const HInstruction::InstructionKind& rhs); @@ -2448,25 +2582,25 @@ class HVariableInputSizeInstruction : public HInstruction { void RemoveAllInputs(); protected: - HVariableInputSizeInstruction(SideEffects side_effects, + HVariableInputSizeInstruction(InstructionKind inst_kind, + SideEffects side_effects, uint32_t dex_pc, - ArenaAllocator* arena, + ArenaAllocator* allocator, size_t number_of_inputs, ArenaAllocKind kind) - : HInstruction(side_effects, dex_pc), - inputs_(number_of_inputs, arena->Adapter(kind)) {} + : HInstruction(inst_kind, side_effects, dex_pc), + inputs_(number_of_inputs, allocator->Adapter(kind)) {} - ArenaVector<HUserRecord<HInstruction*>> inputs_; + DEFAULT_COPY_CONSTRUCTOR(VariableInputSizeInstruction); - private: - DISALLOW_COPY_AND_ASSIGN(HVariableInputSizeInstruction); + ArenaVector<HUserRecord<HInstruction*>> inputs_; }; template<size_t N> class HTemplateInstruction: public HInstruction { public: - HTemplateInstruction<N>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc), inputs_() {} + HTemplateInstruction<N>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc), inputs_() {} virtual ~HTemplateInstruction() {} using HInstruction::GetInputRecords; // Keep the const version visible. @@ -2474,6 +2608,9 @@ class HTemplateInstruction: public HInstruction { return ArrayRef<HUserRecord<HInstruction*>>(inputs_); } + protected: + DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<N>); + private: std::array<HUserRecord<HInstruction*>, N> inputs_; @@ -2484,8 +2621,8 @@ class HTemplateInstruction: public HInstruction { template<> class HTemplateInstruction<0>: public HInstruction { public: - explicit HTemplateInstruction<0>(SideEffects side_effects, uint32_t dex_pc) - : HInstruction(side_effects, dex_pc) {} + explicit HTemplateInstruction<0>(InstructionKind kind, SideEffects side_effects, uint32_t dex_pc) + : HInstruction(kind, side_effects, dex_pc) {} virtual ~HTemplateInstruction() {} @@ -2494,6 +2631,9 @@ class HTemplateInstruction<0>: public HInstruction { return ArrayRef<HUserRecord<HInstruction*>>(); } + protected: + DEFAULT_COPY_CONSTRUCTOR(TemplateInstruction<0>); + private: friend class SsaBuilder; }; @@ -2501,24 +2641,29 @@ class HTemplateInstruction<0>: public HInstruction { template<intptr_t N> class HExpression : public HTemplateInstruction<N> { public: - HExpression<N>(Primitive::Type type, SideEffects side_effects, uint32_t dex_pc) - : HTemplateInstruction<N>(side_effects, dex_pc) { + using HInstruction::InstructionKind; + HExpression<N>(InstructionKind kind, + DataType::Type type, + SideEffects side_effects, + uint32_t dex_pc) + : HTemplateInstruction<N>(kind, side_effects, dex_pc) { this->template SetPackedField<TypeField>(type); } virtual ~HExpression() {} - Primitive::Type GetType() const OVERRIDE { + DataType::Type GetType() const OVERRIDE { return TypeField::Decode(this->GetPackedFields()); } protected: static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kNumberOfExpressionPackedBits = kFieldType + kFieldTypeSize; static_assert(kNumberOfExpressionPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + DEFAULT_COPY_CONSTRUCTOR(Expression<N>); }; // Represents dex's RETURN_VOID opcode. A HReturnVoid is a control flow @@ -2526,14 +2671,15 @@ class HExpression : public HTemplateInstruction<N> { class HReturnVoid FINAL : public HTemplateInstruction<0> { public: explicit HReturnVoid(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) {} + : HTemplateInstruction(kReturnVoid, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } DECLARE_INSTRUCTION(ReturnVoid); - private: - DISALLOW_COPY_AND_ASSIGN(HReturnVoid); + protected: + DEFAULT_COPY_CONSTRUCTOR(ReturnVoid); }; // Represents dex's RETURN opcodes. A HReturn is a control flow @@ -2541,7 +2687,7 @@ class HReturnVoid FINAL : public HTemplateInstruction<0> { class HReturn FINAL : public HTemplateInstruction<1> { public: explicit HReturn(HInstruction* value, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kReturn, SideEffects::None(), dex_pc) { SetRawInputAt(0, value); } @@ -2549,26 +2695,27 @@ class HReturn FINAL : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(Return); - private: - DISALLOW_COPY_AND_ASSIGN(HReturn); + protected: + DEFAULT_COPY_CONSTRUCTOR(Return); }; class HPhi FINAL : public HVariableInputSizeInstruction { public: - HPhi(ArenaAllocator* arena, + HPhi(ArenaAllocator* allocator, uint32_t reg_number, size_t number_of_inputs, - Primitive::Type type, + DataType::Type type, uint32_t dex_pc = kNoDexPc) : HVariableInputSizeInstruction( + kPhi, SideEffects::None(), dex_pc, - arena, + allocator, number_of_inputs, kArenaAllocPhiInputs), reg_number_(reg_number) { SetPackedField<TypeField>(ToPhiType(type)); - DCHECK_NE(GetType(), Primitive::kPrimVoid); + DCHECK_NE(GetType(), DataType::Type::kVoid); // Phis are constructed live and marked dead if conflicting or unused. // Individual steps of SsaBuilder should assume that if a phi has been // marked dead, it can be ignored and will be removed by SsaPhiElimination. @@ -2576,22 +2723,24 @@ class HPhi FINAL : public HVariableInputSizeInstruction { SetPackedFlag<kFlagCanBeNull>(true); } + bool IsClonable() const OVERRIDE { return true; } + // Returns a type equivalent to the given `type`, but that a `HPhi` can hold. - static Primitive::Type ToPhiType(Primitive::Type type) { - return Primitive::PrimitiveKind(type); + static DataType::Type ToPhiType(DataType::Type type) { + return DataType::Kind(type); } bool IsCatchPhi() const { return GetBlock()->IsCatchBlock(); } - Primitive::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } - void SetType(Primitive::Type new_type) { + DataType::Type GetType() const OVERRIDE { return GetPackedField<TypeField>(); } + void SetType(DataType::Type new_type) { // Make sure that only valid type changes occur. The following are allowed: // (1) int -> float/ref (primitive type propagation), // (2) long -> double (primitive type propagation). DCHECK(GetType() == new_type || - (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimFloat) || - (GetType() == Primitive::kPrimInt && new_type == Primitive::kPrimNot) || - (GetType() == Primitive::kPrimLong && new_type == Primitive::kPrimDouble)); + (GetType() == DataType::Type::kInt32 && new_type == DataType::Type::kFloat32) || + (GetType() == DataType::Type::kInt32 && new_type == DataType::Type::kReference) || + (GetType() == DataType::Type::kInt64 && new_type == DataType::Type::kFloat64)); SetPackedField<TypeField>(new_type); } @@ -2638,19 +2787,20 @@ class HPhi FINAL : public HVariableInputSizeInstruction { DECLARE_INSTRUCTION(Phi); + protected: + DEFAULT_COPY_CONSTRUCTOR(Phi); + private: static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kFlagIsLive = kFieldType + kFieldTypeSize; static constexpr size_t kFlagCanBeNull = kFlagIsLive + 1; static constexpr size_t kNumberOfPhiPackedBits = kFlagCanBeNull + 1; static_assert(kNumberOfPhiPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; const uint32_t reg_number_; - - DISALLOW_COPY_AND_ASSIGN(HPhi); }; // The exit instruction is the only instruction of the exit block. @@ -2658,21 +2808,26 @@ class HPhi FINAL : public HVariableInputSizeInstruction { // exit block. class HExit FINAL : public HTemplateInstruction<0> { public: - explicit HExit(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HExit(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kExit, SideEffects::None(), dex_pc) { + } bool IsControlFlow() const OVERRIDE { return true; } DECLARE_INSTRUCTION(Exit); - private: - DISALLOW_COPY_AND_ASSIGN(HExit); + protected: + DEFAULT_COPY_CONSTRUCTOR(Exit); }; // Jumps from one block to another. class HGoto FINAL : public HTemplateInstruction<0> { public: - explicit HGoto(uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc) {} + explicit HGoto(uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kGoto, SideEffects::None(), dex_pc) { + } + bool IsClonable() const OVERRIDE { return true; } bool IsControlFlow() const OVERRIDE { return true; } HBasicBlock* GetSuccessor() const { @@ -2681,14 +2836,15 @@ class HGoto FINAL : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(Goto); - private: - DISALLOW_COPY_AND_ASSIGN(HGoto); + protected: + DEFAULT_COPY_CONSTRUCTOR(Goto); }; class HConstant : public HExpression<0> { public: - explicit HConstant(Primitive::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + explicit HConstant(InstructionKind kind, DataType::Type type, uint32_t dex_pc = kNoDexPc) + : HExpression(kind, type, SideEffects::None(), dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } @@ -2705,8 +2861,8 @@ class HConstant : public HExpression<0> { DECLARE_ABSTRACT_INSTRUCTION(Constant); - private: - DISALLOW_COPY_AND_ASSIGN(HConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(Constant); }; class HNullConstant FINAL : public HConstant { @@ -2724,11 +2880,15 @@ class HNullConstant FINAL : public HConstant { DECLARE_INSTRUCTION(NullConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(NullConstant); + private: - explicit HNullConstant(uint32_t dex_pc = kNoDexPc) : HConstant(Primitive::kPrimNot, dex_pc) {} + explicit HNullConstant(uint32_t dex_pc = kNoDexPc) + : HConstant(kNullConstant, DataType::Type::kReference, dex_pc) { + } friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HNullConstant); }; // Constants of the type int. Those can be from Dex instructions, or @@ -2760,18 +2920,23 @@ class HIntConstant FINAL : public HConstant { DECLARE_INSTRUCTION(IntConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(IntConstant); + private: explicit HIntConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimInt, dex_pc), value_(value) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), value_(value) { + } explicit HIntConstant(bool value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimInt, dex_pc), value_(value ? 1 : 0) {} + : HConstant(kIntConstant, DataType::Type::kInt32, dex_pc), + value_(value ? 1 : 0) { + } const int32_t value_; friend class HGraph; ART_FRIEND_TEST(GraphTest, InsertInstructionBefore); ART_FRIEND_TYPED_TEST(ParallelMoveTest, ConstantLast); - DISALLOW_COPY_AND_ASSIGN(HIntConstant); }; class HLongConstant FINAL : public HConstant { @@ -2794,14 +2959,18 @@ class HLongConstant FINAL : public HConstant { DECLARE_INSTRUCTION(LongConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(LongConstant); + private: explicit HLongConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimLong, dex_pc), value_(value) {} + : HConstant(kLongConstant, DataType::Type::kInt64, dex_pc), + value_(value) { + } const int64_t value_; friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HLongConstant); }; class HFloatConstant FINAL : public HConstant { @@ -2843,18 +3012,24 @@ class HFloatConstant FINAL : public HConstant { DECLARE_INSTRUCTION(FloatConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(FloatConstant); + private: explicit HFloatConstant(float value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimFloat, dex_pc), value_(value) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(value) { + } explicit HFloatConstant(int32_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimFloat, dex_pc), value_(bit_cast<float, int32_t>(value)) {} + : HConstant(kFloatConstant, DataType::Type::kFloat32, dex_pc), + value_(bit_cast<float, int32_t>(value)) { + } const float value_; // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HFloatConstant); }; class HDoubleConstant FINAL : public HConstant { @@ -2894,18 +3069,24 @@ class HDoubleConstant FINAL : public HConstant { DECLARE_INSTRUCTION(DoubleConstant); + protected: + DEFAULT_COPY_CONSTRUCTOR(DoubleConstant); + private: explicit HDoubleConstant(double value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimDouble, dex_pc), value_(value) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(value) { + } explicit HDoubleConstant(int64_t value, uint32_t dex_pc = kNoDexPc) - : HConstant(Primitive::kPrimDouble, dex_pc), value_(bit_cast<double, int64_t>(value)) {} + : HConstant(kDoubleConstant, DataType::Type::kFloat64, dex_pc), + value_(bit_cast<double, int64_t>(value)) { + } const double value_; // Only the SsaBuilder and HGraph can create floating-point constants. friend class SsaBuilder; friend class HGraph; - DISALLOW_COPY_AND_ASSIGN(HDoubleConstant); }; // Conditional branch. A block ending with an HIf instruction must have @@ -2913,10 +3094,11 @@ class HDoubleConstant FINAL : public HConstant { class HIf FINAL : public HTemplateInstruction<1> { public: explicit HIf(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kIf, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } + bool IsClonable() const OVERRIDE { return true; } bool IsControlFlow() const OVERRIDE { return true; } HBasicBlock* IfTrueSuccessor() const { @@ -2929,8 +3111,8 @@ class HIf FINAL : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(If); - private: - DISALLOW_COPY_AND_ASSIGN(HIf); + protected: + DEFAULT_COPY_CONSTRUCTOR(If); }; @@ -2948,7 +3130,7 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { }; explicit HTryBoundary(BoundaryKind kind, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HTemplateInstruction(kTryBoundary, SideEffects::None(), dex_pc) { SetPackedField<BoundaryKindField>(kind); } @@ -2983,6 +3165,9 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(TryBoundary); + protected: + DEFAULT_COPY_CONSTRUCTOR(TryBoundary); + private: static constexpr size_t kFieldBoundaryKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldBoundaryKindSize = @@ -2992,8 +3177,6 @@ class HTryBoundary FINAL : public HTemplateInstruction<0> { static_assert(kNumberOfTryBoundaryPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using BoundaryKindField = BitField<BoundaryKind, kFieldBoundaryKind, kFieldBoundaryKindSize>; - - DISALLOW_COPY_AND_ASSIGN(HTryBoundary); }; // Deoptimize to interpreter, upon checking a condition. @@ -3001,11 +3184,15 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { public: // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move // across. - HDeoptimize(ArenaAllocator* arena, HInstruction* cond, DeoptimizationKind kind, uint32_t dex_pc) + HDeoptimize(ArenaAllocator* allocator, + HInstruction* cond, + DeoptimizationKind kind, + uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::All(), dex_pc, - arena, + allocator, /* number_of_inputs */ 1, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(false); @@ -3013,20 +3200,23 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { SetRawInputAt(0, cond); } + bool IsClonable() const OVERRIDE { return true; } + // Use this constructor when the `HDeoptimize` guards an instruction, and any user // that relies on the deoptimization to pass should have its input be the `HDeoptimize` // instead of `guard`. // We set CanTriggerGC to prevent any intermediate address to be live // at the point of the `HDeoptimize`. - HDeoptimize(ArenaAllocator* arena, + HDeoptimize(ArenaAllocator* allocator, HInstruction* cond, HInstruction* guard, DeoptimizationKind kind, uint32_t dex_pc) : HVariableInputSizeInstruction( + kDeoptimize, SideEffects::CanTriggerGC(), dex_pc, - arena, + allocator, /* number_of_inputs */ 2, kArenaAllocMisc) { SetPackedFlag<kFieldCanBeMoved>(true); @@ -3047,8 +3237,8 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DeoptimizationKind GetDeoptimizationKind() const { return GetPackedField<DeoptimizeKindField>(); } - Primitive::Type GetType() const OVERRIDE { - return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid; + DataType::Type GetType() const OVERRIDE { + return GuardsAnInput() ? GuardedInput()->GetType() : DataType::Type::kVoid; } bool GuardsAnInput() const { @@ -3066,6 +3256,9 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { DECLARE_INSTRUCTION(Deoptimize); + protected: + DEFAULT_COPY_CONSTRUCTOR(Deoptimize); + private: static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits; static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1; @@ -3077,8 +3270,6 @@ class HDeoptimize FINAL : public HVariableInputSizeInstruction { "Too many packed fields."); using DeoptimizeKindField = BitField<DeoptimizationKind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>; - - DISALLOW_COPY_AND_ASSIGN(HDeoptimize); }; // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization. @@ -3090,11 +3281,16 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { public: // CHA guards are only optimized in a separate pass and it has no side effects // with regard to other passes. - HShouldDeoptimizeFlag(ArenaAllocator* arena, uint32_t dex_pc) - : HVariableInputSizeInstruction(SideEffects::None(), dex_pc, arena, 0, kArenaAllocCHA) { + HShouldDeoptimizeFlag(ArenaAllocator* allocator, uint32_t dex_pc) + : HVariableInputSizeInstruction(kShouldDeoptimizeFlag, + SideEffects::None(), + dex_pc, + allocator, + 0, + kArenaAllocCHA) { } - Primitive::Type GetType() const OVERRIDE { return Primitive::kPrimInt; } + DataType::Type GetType() const OVERRIDE { return DataType::Type::kInt32; } // We do all CHA guard elimination/motion in a single pass, after which there is no // further guard elimination/motion since a guard might have been used for justification @@ -3104,8 +3300,8 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { DECLARE_INSTRUCTION(ShouldDeoptimizeFlag); - private: - DISALLOW_COPY_AND_ASSIGN(HShouldDeoptimizeFlag); + protected: + DEFAULT_COPY_CONSTRUCTOR(ShouldDeoptimizeFlag); }; // Represents the ArtMethod that was passed as a first argument to @@ -3113,13 +3309,14 @@ class HShouldDeoptimizeFlag FINAL : public HVariableInputSizeInstruction { // instructions that work with the dex cache. class HCurrentMethod FINAL : public HExpression<0> { public: - explicit HCurrentMethod(Primitive::Type type, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc) {} + explicit HCurrentMethod(DataType::Type type, uint32_t dex_pc = kNoDexPc) + : HExpression(kCurrentMethod, type, SideEffects::None(), dex_pc) { + } DECLARE_INSTRUCTION(CurrentMethod); - private: - DISALLOW_COPY_AND_ASSIGN(HCurrentMethod); + protected: + DEFAULT_COPY_CONSTRUCTOR(CurrentMethod); }; // Fetches an ArtMethod from the virtual table or the interface method table @@ -3132,16 +3329,17 @@ class HClassTableGet FINAL : public HExpression<1> { kLast = kIMTable }; HClassTableGet(HInstruction* cls, - Primitive::Type type, + DataType::Type type, TableKind kind, size_t index, uint32_t dex_pc) - : HExpression(type, SideEffects::None(), dex_pc), + : HExpression(kClassTableGet, type, SideEffects::None(), dex_pc), index_(index) { SetPackedField<TableKindField>(kind); SetRawInputAt(0, cls); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { return other->AsClassTableGet()->GetIndex() == index_ && @@ -3153,6 +3351,9 @@ class HClassTableGet FINAL : public HExpression<1> { DECLARE_INSTRUCTION(ClassTableGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(ClassTableGet); + private: static constexpr size_t kFieldTableKind = kNumberOfExpressionPackedBits; static constexpr size_t kFieldTableKindSize = @@ -3164,8 +3365,6 @@ class HClassTableGet FINAL : public HExpression<1> { // The index of the ArtMethod in the table. const size_t index_; - - DISALLOW_COPY_AND_ASSIGN(HClassTableGet); }; // PackedSwitch (jump table). A block ending with a PackedSwitch instruction will @@ -3177,12 +3376,14 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { uint32_t num_entries, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); } + bool IsClonable() const OVERRIDE { return true; } + bool IsControlFlow() const OVERRIDE { return true; } int32_t GetStartValue() const { return start_value_; } @@ -3195,22 +3396,29 @@ class HPackedSwitch FINAL : public HTemplateInstruction<1> { } DECLARE_INSTRUCTION(PackedSwitch); + protected: + DEFAULT_COPY_CONSTRUCTOR(PackedSwitch); + private: const int32_t start_value_; const uint32_t num_entries_; - - DISALLOW_COPY_AND_ASSIGN(HPackedSwitch); }; class HUnaryOperation : public HExpression<1> { public: - HUnaryOperation(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + HUnaryOperation(InstructionKind kind, + DataType::Type result_type, + HInstruction* input, + uint32_t dex_pc = kNoDexPc) + : HExpression(kind, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); } + // All of the UnaryOperation instructions are clonable. + bool IsClonable() const OVERRIDE { return true; } + HInstruction* GetInput() const { return InputAt(0); } - Primitive::Type GetResultType() const { return GetType(); } + DataType::Type GetResultType() const { return GetType(); } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -3230,25 +3438,29 @@ class HUnaryOperation : public HExpression<1> { DECLARE_ABSTRACT_INSTRUCTION(UnaryOperation); - private: - DISALLOW_COPY_AND_ASSIGN(HUnaryOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(UnaryOperation); }; class HBinaryOperation : public HExpression<2> { public: - HBinaryOperation(Primitive::Type result_type, + HBinaryOperation(InstructionKind kind, + DataType::Type result_type, HInstruction* left, HInstruction* right, SideEffects side_effects = SideEffects::None(), uint32_t dex_pc = kNoDexPc) - : HExpression(result_type, side_effects, dex_pc) { + : HExpression(kind, result_type, side_effects, dex_pc) { SetRawInputAt(0, left); SetRawInputAt(1, right); } + // All of the BinaryOperation instructions are clonable. + bool IsClonable() const OVERRIDE { return true; } + HInstruction* GetLeft() const { return InputAt(0); } HInstruction* GetRight() const { return InputAt(1); } - Primitive::Type GetResultType() const { return GetType(); } + DataType::Type GetResultType() const { return GetType(); } virtual bool IsCommutative() const { return false; } @@ -3320,8 +3532,8 @@ class HBinaryOperation : public HExpression<2> { DECLARE_ABSTRACT_INSTRUCTION(BinaryOperation); - private: - DISALLOW_COPY_AND_ASSIGN(HBinaryOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(BinaryOperation); }; // The comparison bias applies for floating point operations and indicates how NaN @@ -3337,8 +3549,16 @@ std::ostream& operator<<(std::ostream& os, const ComparisonBias& rhs); class HCondition : public HBinaryOperation { public: - HCondition(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(Primitive::kPrimBoolean, first, second, SideEffects::None(), dex_pc) { + HCondition(InstructionKind kind, + HInstruction* first, + HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kind, + DataType::Type::kBool, + first, + second, + SideEffects::None(), + dex_pc) { SetPackedField<ComparisonBiasField>(ComparisonBias::kNoBias); } @@ -3363,7 +3583,7 @@ class HCondition : public HBinaryOperation { } bool IsFPConditionTrueIfNaN() const { - DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); + DCHECK(DataType::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); IfCondition if_cond = GetCondition(); if (if_cond == kCondNE) { return true; @@ -3374,7 +3594,7 @@ class HCondition : public HBinaryOperation { } bool IsFPConditionFalseIfNaN() const { - DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); + DCHECK(DataType::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); IfCondition if_cond = GetCondition(); if (if_cond == kCondEQ) { return true; @@ -3400,7 +3620,7 @@ class HCondition : public HBinaryOperation { template <typename T> int32_t CompareFP(T x, T y) const { - DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); + DCHECK(DataType::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); DCHECK_NE(GetBias(), ComparisonBias::kNoBias); // Handle the bias. return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compare(x, y); @@ -3411,15 +3631,15 @@ class HCondition : public HBinaryOperation { return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc); } - private: - DISALLOW_COPY_AND_ASSIGN(HCondition); + DEFAULT_COPY_CONSTRUCTOR(Condition); }; // Instruction to check if two inputs are equal to each other. class HEqual FINAL : public HCondition { public: HEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3454,16 +3674,19 @@ class HEqual FINAL : public HCondition { return kCondNE; } + protected: + DEFAULT_COPY_CONSTRUCTOR(Equal); + private: template <typename T> static bool Compute(T x, T y) { return x == y; } - - DISALLOW_COPY_AND_ASSIGN(HEqual); }; class HNotEqual FINAL : public HCondition { public: - HNotEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HNotEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kNotEqual, first, second, dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -3497,16 +3720,19 @@ class HNotEqual FINAL : public HCondition { return kCondEQ; } + protected: + DEFAULT_COPY_CONSTRUCTOR(NotEqual); + private: template <typename T> static bool Compute(T x, T y) { return x != y; } - - DISALLOW_COPY_AND_ASSIGN(HNotEqual); }; class HLessThan FINAL : public HCondition { public: - HLessThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThan(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3534,16 +3760,19 @@ class HLessThan FINAL : public HCondition { return kCondGE; } + protected: + DEFAULT_COPY_CONSTRUCTOR(LessThan); + private: template <typename T> static bool Compute(T x, T y) { return x < y; } - - DISALLOW_COPY_AND_ASSIGN(HLessThan); }; class HLessThanOrEqual FINAL : public HCondition { public: - HLessThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + HLessThanOrEqual(HInstruction* first, HInstruction* second, + uint32_t dex_pc = kNoDexPc) + : HCondition(kLessThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3571,16 +3800,18 @@ class HLessThanOrEqual FINAL : public HCondition { return kCondGT; } + protected: + DEFAULT_COPY_CONSTRUCTOR(LessThanOrEqual); + private: template <typename T> static bool Compute(T x, T y) { return x <= y; } - - DISALLOW_COPY_AND_ASSIGN(HLessThanOrEqual); }; class HGreaterThan FINAL : public HCondition { public: HGreaterThan(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThan, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3608,16 +3839,18 @@ class HGreaterThan FINAL : public HCondition { return kCondLE; } + protected: + DEFAULT_COPY_CONSTRUCTOR(GreaterThan); + private: template <typename T> static bool Compute(T x, T y) { return x > y; } - - DISALLOW_COPY_AND_ASSIGN(HGreaterThan); }; class HGreaterThanOrEqual FINAL : public HCondition { public: HGreaterThanOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kGreaterThanOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3645,16 +3878,18 @@ class HGreaterThanOrEqual FINAL : public HCondition { return kCondLT; } + protected: + DEFAULT_COPY_CONSTRUCTOR(GreaterThanOrEqual); + private: template <typename T> static bool Compute(T x, T y) { return x >= y; } - - DISALLOW_COPY_AND_ASSIGN(HGreaterThanOrEqual); }; class HBelow FINAL : public HCondition { public: HBelow(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelow, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3683,18 +3918,20 @@ class HBelow FINAL : public HCondition { return kCondAE; } + protected: + DEFAULT_COPY_CONSTRUCTOR(Below); + private: template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) < MakeUnsigned(y); } - - DISALLOW_COPY_AND_ASSIGN(HBelow); }; class HBelowOrEqual FINAL : public HCondition { public: HBelowOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kBelowOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3723,18 +3960,20 @@ class HBelowOrEqual FINAL : public HCondition { return kCondA; } + protected: + DEFAULT_COPY_CONSTRUCTOR(BelowOrEqual); + private: template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) <= MakeUnsigned(y); } - - DISALLOW_COPY_AND_ASSIGN(HBelowOrEqual); }; class HAbove FINAL : public HCondition { public: HAbove(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAbove, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3763,18 +4002,20 @@ class HAbove FINAL : public HCondition { return kCondBE; } + protected: + DEFAULT_COPY_CONSTRUCTOR(Above); + private: template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) > MakeUnsigned(y); } - - DISALLOW_COPY_AND_ASSIGN(HAbove); }; class HAboveOrEqual FINAL : public HCondition { public: HAboveOrEqual(HInstruction* first, HInstruction* second, uint32_t dex_pc = kNoDexPc) - : HCondition(first, second, dex_pc) {} + : HCondition(kAboveOrEqual, first, second, dex_pc) { + } HConstant* Evaluate(HIntConstant* x, HIntConstant* y) const OVERRIDE { return MakeConstantCondition(Compute(x->GetValue(), y->GetValue()), GetDexPc()); @@ -3803,12 +4044,13 @@ class HAboveOrEqual FINAL : public HCondition { return kCondB; } + protected: + DEFAULT_COPY_CONSTRUCTOR(AboveOrEqual); + private: template <typename T> static bool Compute(T x, T y) { return MakeUnsigned(x) >= MakeUnsigned(y); } - - DISALLOW_COPY_AND_ASSIGN(HAboveOrEqual); }; // Instruction to check how two inputs compare to each other. @@ -3817,20 +4059,21 @@ class HCompare FINAL : public HBinaryOperation { public: // Note that `comparison_type` is the type of comparison performed // between the comparison's inputs, not the type of the instantiated - // HCompare instruction (which is always Primitive::kPrimInt). - HCompare(Primitive::Type comparison_type, + // HCompare instruction (which is always DataType::Type::kInt). + HCompare(DataType::Type comparison_type, HInstruction* first, HInstruction* second, ComparisonBias bias, uint32_t dex_pc) - : HBinaryOperation(Primitive::kPrimInt, + : HBinaryOperation(kCompare, + DataType::Type::kInt32, first, second, SideEffectsForArchRuntimeCalls(comparison_type), dex_pc) { SetPackedField<ComparisonBiasField>(bias); - DCHECK_EQ(comparison_type, Primitive::PrimitiveKind(first->GetType())); - DCHECK_EQ(comparison_type, Primitive::PrimitiveKind(second->GetType())); + DCHECK_EQ(comparison_type, DataType::Kind(first->GetType())); + DCHECK_EQ(comparison_type, DataType::Kind(second->GetType())); } template <typename T> @@ -3838,7 +4081,7 @@ class HCompare FINAL : public HBinaryOperation { template <typename T> int32_t ComputeFP(T x, T y) const { - DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); + DCHECK(DataType::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); DCHECK_NE(GetBias(), ComparisonBias::kNoBias); // Handle the bias. return std::isunordered(x, y) ? (IsGtBias() ? 1 : -1) : Compute(x, y); @@ -3871,11 +4114,11 @@ class HCompare FINAL : public HBinaryOperation { // Does this compare instruction have a "gt bias" (vs an "lt bias")? // Only meaningful for floating-point comparisons. bool IsGtBias() const { - DCHECK(Primitive::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); + DCHECK(DataType::IsFloatingPointType(InputAt(0)->GetType())) << InputAt(0)->GetType(); return GetBias() == ComparisonBias::kGtBias; } - static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type type ATTRIBUTE_UNUSED) { + static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type type ATTRIBUTE_UNUSED) { // Comparisons do not require a runtime call in any back end. return SideEffects::None(); } @@ -3898,8 +4141,7 @@ class HCompare FINAL : public HBinaryOperation { return GetBlock()->GetGraph()->GetIntConstant(value, dex_pc); } - private: - DISALLOW_COPY_AND_ASSIGN(HCompare); + DEFAULT_COPY_CONSTRUCTOR(Compare); }; class HNewInstance FINAL : public HExpression<1> { @@ -3910,7 +4152,10 @@ class HNewInstance FINAL : public HExpression<1> { const DexFile& dex_file, bool finalizable, QuickEntrypointEnum entrypoint) - : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc), + : HExpression(kNewInstance, + DataType::Type::kReference, + SideEffects::CanTriggerGC(), + dex_pc), type_index_(type_index), dex_file_(dex_file), entrypoint_(entrypoint) { @@ -3918,6 +4163,8 @@ class HNewInstance FINAL : public HExpression<1> { SetRawInputAt(0, cls); } + bool IsClonable() const OVERRIDE { return true; } + dex::TypeIndex GetTypeIndex() const { return type_index_; } const DexFile& GetDexFile() const { return dex_file_; } @@ -3954,6 +4201,9 @@ class HNewInstance FINAL : public HExpression<1> { DECLARE_INSTRUCTION(NewInstance); + protected: + DEFAULT_COPY_CONSTRUCTOR(NewInstance); + private: static constexpr size_t kFlagFinalizable = kNumberOfExpressionPackedBits; static constexpr size_t kNumberOfNewInstancePackedBits = kFlagFinalizable + 1; @@ -3963,8 +4213,6 @@ class HNewInstance FINAL : public HExpression<1> { const dex::TypeIndex type_index_; const DexFile& dex_file_; QuickEntrypointEnum entrypoint_; - - DISALLOW_COPY_AND_ASSIGN(HNewInstance); }; enum IntrinsicNeedsEnvironmentOrCache { @@ -3998,7 +4246,7 @@ class HInvoke : public HVariableInputSizeInstruction { // inputs at the end of their list of inputs. uint32_t GetNumberOfArguments() const { return number_of_arguments_; } - Primitive::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); } + DataType::Type GetType() const OVERRIDE { return GetPackedField<ReturnTypeField>(); } uint32_t GetDexMethodIndex() const { return dex_method_index_; } @@ -4023,6 +4271,10 @@ class HInvoke : public HVariableInputSizeInstruction { bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); } + void SetAlwaysThrows(bool always_throws) { SetPackedFlag<kFlagAlwaysThrows>(always_throws); } + + bool AlwaysThrows() const OVERRIDE { return GetPackedFlag<kFlagAlwaysThrows>(); } + bool CanBeMoved() const OVERRIDE { return IsIntrinsic() && !DoesAnyWrite(); } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -4051,25 +4303,28 @@ class HInvoke : public HVariableInputSizeInstruction { static constexpr size_t kFieldReturnType = kFieldInvokeType + kFieldInvokeTypeSize; static constexpr size_t kFieldReturnTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kFlagCanThrow = kFieldReturnType + kFieldReturnTypeSize; - static constexpr size_t kNumberOfInvokePackedBits = kFlagCanThrow + 1; + static constexpr size_t kFlagAlwaysThrows = kFlagCanThrow + 1; + static constexpr size_t kNumberOfInvokePackedBits = kFlagAlwaysThrows + 1; static_assert(kNumberOfInvokePackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using InvokeTypeField = BitField<InvokeType, kFieldInvokeType, kFieldInvokeTypeSize>; - using ReturnTypeField = BitField<Primitive::Type, kFieldReturnType, kFieldReturnTypeSize>; + using ReturnTypeField = BitField<DataType::Type, kFieldReturnType, kFieldReturnTypeSize>; - HInvoke(ArenaAllocator* arena, + HInvoke(InstructionKind kind, + ArenaAllocator* allocator, uint32_t number_of_arguments, uint32_t number_of_other_inputs, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, ArtMethod* resolved_method, InvokeType invoke_type) : HVariableInputSizeInstruction( + kind, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc, - arena, + allocator, number_of_arguments + number_of_other_inputs, kArenaAllocInvokeInputs), number_of_arguments_(number_of_arguments), @@ -4082,6 +4337,8 @@ class HInvoke : public HVariableInputSizeInstruction { SetPackedFlag<kFlagCanThrow>(true); } + DEFAULT_COPY_CONSTRUCTOR(Invoke); + uint32_t number_of_arguments_; ArtMethod* resolved_method_; const uint32_t dex_method_index_; @@ -4089,20 +4346,18 @@ class HInvoke : public HVariableInputSizeInstruction { // A magic word holding optimizations for intrinsics. See intrinsics.h. uint32_t intrinsic_optimizations_; - - private: - DISALLOW_COPY_AND_ASSIGN(HInvoke); }; class HInvokeUnresolved FINAL : public HInvoke { public: - HInvokeUnresolved(ArenaAllocator* arena, + HInvokeUnresolved(ArenaAllocator* allocator, uint32_t number_of_arguments, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, InvokeType invoke_type) - : HInvoke(arena, + : HInvoke(kInvokeUnresolved, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, @@ -4112,32 +4367,38 @@ class HInvokeUnresolved FINAL : public HInvoke { invoke_type) { } + bool IsClonable() const OVERRIDE { return true; } + DECLARE_INSTRUCTION(InvokeUnresolved); - private: - DISALLOW_COPY_AND_ASSIGN(HInvokeUnresolved); + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeUnresolved); }; class HInvokePolymorphic FINAL : public HInvoke { public: - HInvokePolymorphic(ArenaAllocator* arena, + HInvokePolymorphic(ArenaAllocator* allocator, uint32_t number_of_arguments, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index) - : HInvoke(arena, + : HInvoke(kInvokePolymorphic, + allocator, number_of_arguments, 0u /* number_of_other_inputs */, return_type, dex_pc, dex_method_index, nullptr, - kVirtual) {} + kVirtual) { + } + + bool IsClonable() const OVERRIDE { return true; } DECLARE_INSTRUCTION(InvokePolymorphic); - private: - DISALLOW_COPY_AND_ASSIGN(HInvokePolymorphic); + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokePolymorphic); }; class HInvokeStaticOrDirect FINAL : public HInvoke { @@ -4197,9 +4458,9 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { uint64_t method_load_data; }; - HInvokeStaticOrDirect(ArenaAllocator* arena, + HInvokeStaticOrDirect(ArenaAllocator* allocator, uint32_t number_of_arguments, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t method_index, ArtMethod* resolved_method, @@ -4207,7 +4468,8 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { InvokeType invoke_type, MethodReference target_method, ClinitCheckRequirement clinit_check_requirement) - : HInvoke(arena, + : HInvoke(kInvokeStaticOrDirect, + allocator, number_of_arguments, // There is potentially one extra argument for the HCurrentMethod node, and // potentially one other if the clinit check is explicit, and potentially @@ -4224,6 +4486,8 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { SetPackedField<ClinitCheckRequirementField>(clinit_check_requirement); } + bool IsClonable() const OVERRIDE { return true; } + void SetDispatchInfo(const DispatchInfo& dispatch_info) { bool had_current_method_input = HasCurrentMethodInput(); bool needs_current_method_input = NeedsCurrentMethodInput(dispatch_info.method_load_kind); @@ -4277,7 +4541,7 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { } bool CanBeNull() const OVERRIDE { - return GetPackedField<ReturnTypeField>() == Primitive::kPrimNot && !IsStringInit(); + return GetPackedField<ReturnTypeField>() == DataType::Type::kReference && !IsStringInit(); } // Get the index of the special input, if any. @@ -4369,6 +4633,9 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { DECLARE_INSTRUCTION(InvokeStaticOrDirect); + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeStaticOrDirect); + private: static constexpr size_t kFieldClinitCheckRequirement = kNumberOfInvokePackedBits; static constexpr size_t kFieldClinitCheckRequirementSize = @@ -4382,24 +4649,23 @@ class HInvokeStaticOrDirect FINAL : public HInvoke { kFieldClinitCheckRequirementSize>; // Cached values of the resolved method, to avoid needing the mutator lock. - MethodReference target_method_; + const MethodReference target_method_; DispatchInfo dispatch_info_; - - DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect); }; std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs); std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::ClinitCheckRequirement rhs); class HInvokeVirtual FINAL : public HInvoke { public: - HInvokeVirtual(ArenaAllocator* arena, + HInvokeVirtual(ArenaAllocator* allocator, uint32_t number_of_arguments, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t vtable_index) - : HInvoke(arena, + : HInvoke(kInvokeVirtual, + allocator, number_of_arguments, 0u, return_type, @@ -4407,7 +4673,10 @@ class HInvokeVirtual FINAL : public HInvoke { dex_method_index, resolved_method, kVirtual), - vtable_index_(vtable_index) {} + vtable_index_(vtable_index) { + } + + bool IsClonable() const OVERRIDE { return true; } bool CanBeNull() const OVERRIDE { switch (GetIntrinsic()) { @@ -4431,23 +4700,25 @@ class HInvokeVirtual FINAL : public HInvoke { DECLARE_INSTRUCTION(InvokeVirtual); + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeVirtual); + private: // Cached value of the resolved method, to avoid needing the mutator lock. const uint32_t vtable_index_; - - DISALLOW_COPY_AND_ASSIGN(HInvokeVirtual); }; class HInvokeInterface FINAL : public HInvoke { public: - HInvokeInterface(ArenaAllocator* arena, + HInvokeInterface(ArenaAllocator* allocator, uint32_t number_of_arguments, - Primitive::Type return_type, + DataType::Type return_type, uint32_t dex_pc, uint32_t dex_method_index, ArtMethod* resolved_method, uint32_t imt_index) - : HInvoke(arena, + : HInvoke(kInvokeInterface, + allocator, number_of_arguments, 0u, return_type, @@ -4455,7 +4726,10 @@ class HInvokeInterface FINAL : public HInvoke { dex_method_index, resolved_method, kInterface), - imt_index_(imt_index) {} + imt_index_(imt_index) { + } + + bool IsClonable() const OVERRIDE { return true; } bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { // TODO: Add implicit null checks in intrinsics. @@ -4468,22 +4742,22 @@ class HInvokeInterface FINAL : public HInvoke { } uint32_t GetImtIndex() const { return imt_index_; } - uint32_t GetDexMethodIndex() const { return dex_method_index_; } DECLARE_INSTRUCTION(InvokeInterface); + protected: + DEFAULT_COPY_CONSTRUCTOR(InvokeInterface); + private: // Cached value of the resolved method, to avoid needing the mutator lock. const uint32_t imt_index_; - - DISALLOW_COPY_AND_ASSIGN(HInvokeInterface); }; class HNeg FINAL : public HUnaryOperation { public: - HNeg(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) { - DCHECK_EQ(result_type, Primitive::PrimitiveKind(input->GetType())); + HNeg(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kNeg, result_type, input, dex_pc) { + DCHECK_EQ(result_type, DataType::Kind(input->GetType())); } template <typename T> static T Compute(T x) { return -x; } @@ -4503,18 +4777,20 @@ class HNeg FINAL : public HUnaryOperation { DECLARE_INSTRUCTION(Neg); - private: - DISALLOW_COPY_AND_ASSIGN(HNeg); + protected: + DEFAULT_COPY_CONSTRUCTOR(Neg); }; class HNewArray FINAL : public HExpression<2> { public: HNewArray(HInstruction* cls, HInstruction* length, uint32_t dex_pc) - : HExpression(Primitive::kPrimNot, SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNewArray, DataType::Type::kReference, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, cls); SetRawInputAt(1, length); } + bool IsClonable() const OVERRIDE { return true; } + // Calls runtime so needs an environment. bool NeedsEnvironment() const OVERRIDE { return true; } @@ -4534,17 +4810,18 @@ class HNewArray FINAL : public HExpression<2> { DECLARE_INSTRUCTION(NewArray); - private: - DISALLOW_COPY_AND_ASSIGN(HNewArray); + protected: + DEFAULT_COPY_CONSTRUCTOR(NewArray); }; class HAdd FINAL : public HBinaryOperation { public: - HAdd(Primitive::Type result_type, + HAdd(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAdd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4569,17 +4846,18 @@ class HAdd FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Add); - private: - DISALLOW_COPY_AND_ASSIGN(HAdd); + protected: + DEFAULT_COPY_CONSTRUCTOR(Add); }; class HSub FINAL : public HBinaryOperation { public: - HSub(Primitive::Type result_type, + HSub(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kSub, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> static T Compute(T x, T y) { return x - y; } @@ -4602,17 +4880,18 @@ class HSub FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Sub); - private: - DISALLOW_COPY_AND_ASSIGN(HSub); + protected: + DEFAULT_COPY_CONSTRUCTOR(Sub); }; class HMul FINAL : public HBinaryOperation { public: - HMul(Primitive::Type result_type, + HMul(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kMul, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4637,21 +4916,22 @@ class HMul FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Mul); - private: - DISALLOW_COPY_AND_ASSIGN(HMul); + protected: + DEFAULT_COPY_CONSTRUCTOR(Mul); }; class HDiv FINAL : public HBinaryOperation { public: - HDiv(Primitive::Type result_type, + HDiv(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kDiv, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { - DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType(); + DCHECK(!DataType::IsFloatingPointType(GetType())) << GetType(); // Our graph structure ensures we never have 0 for `y` during // constant folding. DCHECK_NE(y, 0); @@ -4661,7 +4941,7 @@ class HDiv FINAL : public HBinaryOperation { template <typename T> T ComputeFP(T x, T y) const { - DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType(); + DCHECK(DataType::IsFloatingPointType(GetType())) << GetType(); return x / y; } @@ -4684,21 +4964,22 @@ class HDiv FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Div); - private: - DISALLOW_COPY_AND_ASSIGN(HDiv); + protected: + DEFAULT_COPY_CONSTRUCTOR(Div); }; class HRem FINAL : public HBinaryOperation { public: - HRem(Primitive::Type result_type, + HRem(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kRem, result_type, left, right, SideEffects::None(), dex_pc) { + } template <typename T> T ComputeIntegral(T x, T y) const { - DCHECK(!Primitive::IsFloatingPointType(GetType())) << GetType(); + DCHECK(!DataType::IsFloatingPointType(GetType())) << GetType(); // Our graph structure ensures we never have 0 for `y` during // constant folding. DCHECK_NE(y, 0); @@ -4708,7 +4989,7 @@ class HRem FINAL : public HBinaryOperation { template <typename T> T ComputeFP(T x, T y) const { - DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType(); + DCHECK(DataType::IsFloatingPointType(GetType())) << GetType(); return std::fmod(x, y); } @@ -4731,8 +5012,8 @@ class HRem FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Rem); - private: - DISALLOW_COPY_AND_ASSIGN(HRem); + protected: + DEFAULT_COPY_CONSTRUCTOR(Rem); }; class HDivZeroCheck FINAL : public HExpression<1> { @@ -4740,11 +5021,11 @@ class HDivZeroCheck FINAL : public HExpression<1> { // `HDivZeroCheck` can trigger GC, as it may call the `ArithmeticException` // constructor. HDivZeroCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kDivZeroCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } - Primitive::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); } + DataType::Type GetType() const OVERRIDE { return InputAt(0)->GetType(); } bool CanBeMoved() const OVERRIDE { return true; } @@ -4757,19 +5038,19 @@ class HDivZeroCheck FINAL : public HExpression<1> { DECLARE_INSTRUCTION(DivZeroCheck); - private: - DISALLOW_COPY_AND_ASSIGN(HDivZeroCheck); + protected: + DEFAULT_COPY_CONSTRUCTOR(DivZeroCheck); }; class HShl FINAL : public HBinaryOperation { public: - HShl(Primitive::Type result_type, + HShl(DataType::Type result_type, HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { - DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType())); - DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType())); + : HBinaryOperation(kShl, result_type, value, distance, SideEffects::None(), dex_pc) { + DCHECK_EQ(result_type, DataType::Kind(value->GetType())); + DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } template <typename T> @@ -4803,19 +5084,19 @@ class HShl FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Shl); - private: - DISALLOW_COPY_AND_ASSIGN(HShl); + protected: + DEFAULT_COPY_CONSTRUCTOR(Shl); }; class HShr FINAL : public HBinaryOperation { public: - HShr(Primitive::Type result_type, + HShr(DataType::Type result_type, HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { - DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType())); - DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType())); + : HBinaryOperation(kShr, result_type, value, distance, SideEffects::None(), dex_pc) { + DCHECK_EQ(result_type, DataType::Kind(value->GetType())); + DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } template <typename T> @@ -4849,19 +5130,19 @@ class HShr FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Shr); - private: - DISALLOW_COPY_AND_ASSIGN(HShr); + protected: + DEFAULT_COPY_CONSTRUCTOR(Shr); }; class HUShr FINAL : public HBinaryOperation { public: - HUShr(Primitive::Type result_type, + HUShr(DataType::Type result_type, HInstruction* value, HInstruction* distance, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, value, distance, SideEffects::None(), dex_pc) { - DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType())); - DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType())); + : HBinaryOperation(kUShr, result_type, value, distance, SideEffects::None(), dex_pc) { + DCHECK_EQ(result_type, DataType::Kind(value->GetType())); + DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } template <typename T> @@ -4897,17 +5178,18 @@ class HUShr FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(UShr); - private: - DISALLOW_COPY_AND_ASSIGN(HUShr); + protected: + DEFAULT_COPY_CONSTRUCTOR(UShr); }; class HAnd FINAL : public HBinaryOperation { public: - HAnd(Primitive::Type result_type, + HAnd(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kAnd, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4934,17 +5216,18 @@ class HAnd FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(And); - private: - DISALLOW_COPY_AND_ASSIGN(HAnd); + protected: + DEFAULT_COPY_CONSTRUCTOR(And); }; class HOr FINAL : public HBinaryOperation { public: - HOr(Primitive::Type result_type, + HOr(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kOr, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -4971,17 +5254,18 @@ class HOr FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Or); - private: - DISALLOW_COPY_AND_ASSIGN(HOr); + protected: + DEFAULT_COPY_CONSTRUCTOR(Or); }; class HXor FINAL : public HBinaryOperation { public: - HXor(Primitive::Type result_type, + HXor(DataType::Type result_type, HInstruction* left, HInstruction* right, uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc) {} + : HBinaryOperation(kXor, result_type, left, right, SideEffects::None(), dex_pc) { + } bool IsCommutative() const OVERRIDE { return true; } @@ -5008,16 +5292,16 @@ class HXor FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Xor); - private: - DISALLOW_COPY_AND_ASSIGN(HXor); + protected: + DEFAULT_COPY_CONSTRUCTOR(Xor); }; class HRor FINAL : public HBinaryOperation { public: - HRor(Primitive::Type result_type, HInstruction* value, HInstruction* distance) - : HBinaryOperation(result_type, value, distance) { - DCHECK_EQ(result_type, Primitive::PrimitiveKind(value->GetType())); - DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(distance->GetType())); + HRor(DataType::Type result_type, HInstruction* value, HInstruction* distance) + : HBinaryOperation(kRor, result_type, value, distance) { + DCHECK_EQ(result_type, DataType::Kind(value->GetType())); + DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(distance->GetType())); } template <typename T> @@ -5059,8 +5343,8 @@ class HRor FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(Ror); - private: - DISALLOW_COPY_AND_ASSIGN(HRor); + protected: + DEFAULT_COPY_CONSTRUCTOR(Ror); }; // The value of a parameter in this method. Its location depends on @@ -5070,9 +5354,9 @@ class HParameterValue FINAL : public HExpression<0> { HParameterValue(const DexFile& dex_file, dex::TypeIndex type_index, uint8_t index, - Primitive::Type parameter_type, + DataType::Type parameter_type, bool is_this = false) - : HExpression(parameter_type, SideEffects::None(), kNoDexPc), + : HExpression(kParameterValue, parameter_type, SideEffects::None(), kNoDexPc), dex_file_(dex_file), type_index_(type_index), index_(index) { @@ -5090,6 +5374,9 @@ class HParameterValue FINAL : public HExpression<0> { DECLARE_INSTRUCTION(ParameterValue); + protected: + DEFAULT_COPY_CONSTRUCTOR(ParameterValue); + private: // Whether or not the parameter value corresponds to 'this' argument. static constexpr size_t kFlagIsThis = kNumberOfExpressionPackedBits; @@ -5103,14 +5390,13 @@ class HParameterValue FINAL : public HExpression<0> { // The index of this parameter in the parameters list. Must be less // than HGraph::number_of_in_vregs_. const uint8_t index_; - - DISALLOW_COPY_AND_ASSIGN(HParameterValue); }; class HNot FINAL : public HUnaryOperation { public: - HNot(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(result_type, input, dex_pc) {} + HNot(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HUnaryOperation(kNot, result_type, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5136,14 +5422,15 @@ class HNot FINAL : public HUnaryOperation { DECLARE_INSTRUCTION(Not); - private: - DISALLOW_COPY_AND_ASSIGN(HNot); + protected: + DEFAULT_COPY_CONSTRUCTOR(Not); }; class HBooleanNot FINAL : public HUnaryOperation { public: explicit HBooleanNot(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HUnaryOperation(Primitive::Type::kPrimBoolean, input, dex_pc) {} + : HUnaryOperation(kBooleanNot, DataType::Type::kBool, input, dex_pc) { + } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5173,23 +5460,23 @@ class HBooleanNot FINAL : public HUnaryOperation { DECLARE_INSTRUCTION(BooleanNot); - private: - DISALLOW_COPY_AND_ASSIGN(HBooleanNot); + protected: + DEFAULT_COPY_CONSTRUCTOR(BooleanNot); }; class HTypeConversion FINAL : public HExpression<1> { public: // Instantiate a type conversion of `input` to `result_type`. - HTypeConversion(Primitive::Type result_type, HInstruction* input, uint32_t dex_pc) - : HExpression(result_type, SideEffects::None(), dex_pc) { + HTypeConversion(DataType::Type result_type, HInstruction* input, uint32_t dex_pc = kNoDexPc) + : HExpression(kTypeConversion, result_type, SideEffects::None(), dex_pc) { SetRawInputAt(0, input); // Invariant: We should never generate a conversion to a Boolean value. - DCHECK_NE(Primitive::kPrimBoolean, result_type); + DCHECK_NE(DataType::Type::kBool, result_type); } HInstruction* GetInput() const { return InputAt(0); } - Primitive::Type GetInputType() const { return GetInput()->GetType(); } - Primitive::Type GetResultType() const { return GetType(); } + DataType::Type GetInputType() const { return GetInput()->GetType(); } + DataType::Type GetResultType() const { return GetType(); } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -5202,8 +5489,8 @@ class HTypeConversion FINAL : public HExpression<1> { DECLARE_INSTRUCTION(TypeConversion); - private: - DISALLOW_COPY_AND_ASSIGN(HTypeConversion); + protected: + DEFAULT_COPY_CONSTRUCTOR(TypeConversion); }; static constexpr uint32_t kNoRegNumber = -1; @@ -5213,10 +5500,11 @@ class HNullCheck FINAL : public HExpression<1> { // `HNullCheck` can trigger GC, as it may call the `NullPointerException` // constructor. HNullCheck(HInstruction* value, uint32_t dex_pc) - : HExpression(value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + : HExpression(kNullCheck, value->GetType(), SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, value); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -5228,11 +5516,10 @@ class HNullCheck FINAL : public HExpression<1> { bool CanBeNull() const OVERRIDE { return false; } - DECLARE_INSTRUCTION(NullCheck); - private: - DISALLOW_COPY_AND_ASSIGN(HNullCheck); + protected: + DEFAULT_COPY_CONSTRUCTOR(NullCheck); }; // Embeds an ArtField and all the information required by the compiler. We cache @@ -5241,7 +5528,7 @@ class FieldInfo : public ValueObject { public: FieldInfo(ArtField* field, MemberOffset field_offset, - Primitive::Type field_type, + DataType::Type field_type, bool is_volatile, uint32_t index, uint16_t declaring_class_def_index, @@ -5256,7 +5543,7 @@ class FieldInfo : public ValueObject { ArtField* GetField() const { return field_; } MemberOffset GetFieldOffset() const { return field_offset_; } - Primitive::Type GetFieldType() const { return field_type_; } + DataType::Type GetFieldType() const { return field_type_; } uint32_t GetFieldIndex() const { return index_; } uint16_t GetDeclaringClassDefIndex() const { return declaring_class_def_index_;} const DexFile& GetDexFile() const { return dex_file_; } @@ -5265,7 +5552,7 @@ class FieldInfo : public ValueObject { private: ArtField* const field_; const MemberOffset field_offset_; - const Primitive::Type field_type_; + const DataType::Type field_type_; const bool is_volatile_; const uint32_t index_; const uint16_t declaring_class_def_index_; @@ -5276,14 +5563,17 @@ class HInstanceFieldGet FINAL : public HExpression<1> { public: HInstanceFieldGet(HInstruction* value, ArtField* field, - Primitive::Type field_type, + DataType::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kInstanceFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5294,6 +5584,7 @@ class HInstanceFieldGet FINAL : public HExpression<1> { SetRawInputAt(0, value); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -5311,15 +5602,23 @@ class HInstanceFieldGet FINAL : public HExpression<1> { const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } - Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } + void SetType(DataType::Type new_type) { + DCHECK(DataType::IsIntegralType(GetType())); + DCHECK(DataType::IsIntegralType(new_type)); + DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type)); + SetPackedField<TypeField>(new_type); + } + DECLARE_INSTRUCTION(InstanceFieldGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceFieldGet); + private: const FieldInfo field_info_; - - DISALLOW_COPY_AND_ASSIGN(HInstanceFieldGet); }; class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { @@ -5327,14 +5626,16 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { HInstanceFieldSet(HInstruction* object, HInstruction* value, ArtField* field, - Primitive::Type field_type, + DataType::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kInstanceFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -5347,13 +5648,15 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } + bool IsClonable() const OVERRIDE { return true; } + bool CanDoImplicitNullCheckOn(HInstruction* obj) const OVERRIDE { return (obj == InputAt(0)) && art::CanDoImplicitNullCheckOn(GetFieldOffset().Uint32Value()); } const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } - Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } bool GetValueCanBeNull() const { return GetPackedFlag<kFlagValueCanBeNull>(); } @@ -5361,6 +5664,9 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { DECLARE_INSTRUCTION(InstanceFieldSet); + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceFieldSet); + private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfInstanceFieldSetPackedBits = kFlagValueCanBeNull + 1; @@ -5368,23 +5674,35 @@ class HInstanceFieldSet FINAL : public HTemplateInstruction<2> { "Too many packed fields."); const FieldInfo field_info_; - - DISALLOW_COPY_AND_ASSIGN(HInstanceFieldSet); }; class HArrayGet FINAL : public HExpression<2> { public: HArrayGet(HInstruction* array, HInstruction* index, - Primitive::Type type, + DataType::Type type, + uint32_t dex_pc) + : HArrayGet(array, + index, + type, + SideEffects::ArrayReadOfType(type), + dex_pc, + /* is_string_char_at */ false) { + } + + HArrayGet(HInstruction* array, + HInstruction* index, + DataType::Type type, + SideEffects side_effects, uint32_t dex_pc, - bool is_string_char_at = false) - : HExpression(type, SideEffects::ArrayReadOfType(type), dex_pc) { + bool is_string_char_at) + : HExpression(kArrayGet, type, side_effects, dex_pc) { SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, array); SetRawInputAt(1, index); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -5410,11 +5728,11 @@ class HArrayGet FINAL : public HExpression<2> { DCHECK_EQ(GetBlock(), other->GetBlock()); DCHECK_EQ(GetArray(), other->GetArray()); DCHECK_EQ(GetIndex(), other->GetIndex()); - if (Primitive::IsIntOrLongType(GetType())) { - DCHECK(Primitive::IsFloatingPointType(other->GetType())) << other->GetType(); + if (DataType::IsIntOrLongType(GetType())) { + DCHECK(DataType::IsFloatingPointType(other->GetType())) << other->GetType(); } else { - DCHECK(Primitive::IsFloatingPointType(GetType())) << GetType(); - DCHECK(Primitive::IsIntOrLongType(other->GetType())) << other->GetType(); + DCHECK(DataType::IsFloatingPointType(GetType())) << GetType(); + DCHECK(DataType::IsIntOrLongType(other->GetType())) << other->GetType(); } } return result; @@ -5425,8 +5743,18 @@ class HArrayGet FINAL : public HExpression<2> { HInstruction* GetArray() const { return InputAt(0); } HInstruction* GetIndex() const { return InputAt(1); } + void SetType(DataType::Type new_type) { + DCHECK(DataType::IsIntegralType(GetType())); + DCHECK(DataType::IsIntegralType(new_type)); + DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type)); + SetPackedField<TypeField>(new_type); + } + DECLARE_INSTRUCTION(ArrayGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(ArrayGet); + private: // We treat a String as an array, creating the HArrayGet from String.charAt() // intrinsic in the instruction simplifier. We can always determine whether @@ -5437,8 +5765,6 @@ class HArrayGet FINAL : public HExpression<2> { static constexpr size_t kNumberOfArrayGetPackedBits = kFlagIsStringCharAt + 1; static_assert(kNumberOfArrayGetPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HArrayGet); }; class HArraySet FINAL : public HTemplateInstruction<3> { @@ -5446,20 +5772,35 @@ class HArraySet FINAL : public HTemplateInstruction<3> { HArraySet(HInstruction* array, HInstruction* index, HInstruction* value, - Primitive::Type expected_component_type, + DataType::Type expected_component_type, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc) { + : HArraySet(array, + index, + value, + expected_component_type, + // Make a best guess for side effects now, may be refined during SSA building. + ComputeSideEffects(GetComponentType(value->GetType(), expected_component_type)), + dex_pc) { + } + + HArraySet(HInstruction* array, + HInstruction* index, + HInstruction* value, + DataType::Type expected_component_type, + SideEffects side_effects, + uint32_t dex_pc) + : HTemplateInstruction(kArraySet, side_effects, dex_pc) { SetPackedField<ExpectedComponentTypeField>(expected_component_type); - SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == Primitive::kPrimNot); + SetPackedFlag<kFlagNeedsTypeCheck>(value->GetType() == DataType::Type::kReference); SetPackedFlag<kFlagValueCanBeNull>(true); SetPackedFlag<kFlagStaticTypeOfArrayIsObjectArray>(false); SetRawInputAt(0, array); SetRawInputAt(1, index); SetRawInputAt(2, value); - // Make a best guess now, may be refined during SSA building. - ComputeSideEffects(); } + bool IsClonable() const OVERRIDE { return true; } + bool NeedsEnvironment() const OVERRIDE { // We call a runtime method to throw ArrayStoreException. return NeedsTypeCheck(); @@ -5495,37 +5836,43 @@ class HArraySet FINAL : public HTemplateInstruction<3> { HInstruction* GetIndex() const { return InputAt(1); } HInstruction* GetValue() const { return InputAt(2); } - Primitive::Type GetComponentType() const { + DataType::Type GetComponentType() const { + return GetComponentType(GetValue()->GetType(), GetRawExpectedComponentType()); + } + + static DataType::Type GetComponentType(DataType::Type value_type, + DataType::Type expected_component_type) { // The Dex format does not type floating point index operations. Since the - // `expected_component_type_` is set during building and can therefore not + // `expected_component_type` comes from SSA building and can therefore not // be correct, we also check what is the value type. If it is a floating // point type, we must use that type. - Primitive::Type value_type = GetValue()->GetType(); - return ((value_type == Primitive::kPrimFloat) || (value_type == Primitive::kPrimDouble)) + return ((value_type == DataType::Type::kFloat32) || (value_type == DataType::Type::kFloat64)) ? value_type - : GetRawExpectedComponentType(); + : expected_component_type; } - Primitive::Type GetRawExpectedComponentType() const { + DataType::Type GetRawExpectedComponentType() const { return GetPackedField<ExpectedComponentTypeField>(); } - void ComputeSideEffects() { - Primitive::Type type = GetComponentType(); - SetSideEffects(SideEffects::ArrayWriteOfType(type).Union( - SideEffectsForArchRuntimeCalls(type))); + static SideEffects ComputeSideEffects(DataType::Type type) { + return SideEffects::ArrayWriteOfType(type).Union(SideEffectsForArchRuntimeCalls(type)); } - static SideEffects SideEffectsForArchRuntimeCalls(Primitive::Type value_type) { - return (value_type == Primitive::kPrimNot) ? SideEffects::CanTriggerGC() : SideEffects::None(); + static SideEffects SideEffectsForArchRuntimeCalls(DataType::Type value_type) { + return (value_type == DataType::Type::kReference) ? SideEffects::CanTriggerGC() + : SideEffects::None(); } DECLARE_INSTRUCTION(ArraySet); + protected: + DEFAULT_COPY_CONSTRUCTOR(ArraySet); + private: static constexpr size_t kFieldExpectedComponentType = kNumberOfGenericPackedBits; static constexpr size_t kFieldExpectedComponentTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kFlagNeedsTypeCheck = kFieldExpectedComponentType + kFieldExpectedComponentTypeSize; static constexpr size_t kFlagValueCanBeNull = kFlagNeedsTypeCheck + 1; @@ -5536,21 +5883,20 @@ class HArraySet FINAL : public HTemplateInstruction<3> { kFlagStaticTypeOfArrayIsObjectArray + 1; static_assert(kNumberOfArraySetPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using ExpectedComponentTypeField = - BitField<Primitive::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; - - DISALLOW_COPY_AND_ASSIGN(HArraySet); + BitField<DataType::Type, kFieldExpectedComponentType, kFieldExpectedComponentTypeSize>; }; class HArrayLength FINAL : public HExpression<1> { public: HArrayLength(HInstruction* array, uint32_t dex_pc, bool is_string_length = false) - : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + : HExpression(kArrayLength, DataType::Type::kInt32, SideEffects::None(), dex_pc) { SetPackedFlag<kFlagIsStringLength>(is_string_length); // Note that arrays do not change length, so the instruction does not // depend on any write. SetRawInputAt(0, array); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -5563,6 +5909,9 @@ class HArrayLength FINAL : public HExpression<1> { DECLARE_INSTRUCTION(ArrayLength); + protected: + DEFAULT_COPY_CONSTRUCTOR(ArrayLength); + private: // We treat a String as an array, creating the HArrayLength from String.length() // or String.isEmpty() intrinsic in the instruction simplifier. We can always @@ -5573,8 +5922,6 @@ class HArrayLength FINAL : public HExpression<1> { static constexpr size_t kNumberOfArrayLengthPackedBits = kFlagIsStringLength + 1; static_assert(kNumberOfArrayLengthPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HArrayLength); }; class HBoundsCheck FINAL : public HExpression<2> { @@ -5584,14 +5931,15 @@ class HBoundsCheck FINAL : public HExpression<2> { HBoundsCheck(HInstruction* index, HInstruction* length, uint32_t dex_pc, - bool string_char_at = false) - : HExpression(index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { - DCHECK_EQ(Primitive::kPrimInt, Primitive::PrimitiveKind(index->GetType())); - SetPackedFlag<kFlagIsStringCharAt>(string_char_at); + bool is_string_char_at = false) + : HExpression(kBoundsCheck, index->GetType(), SideEffects::CanTriggerGC(), dex_pc) { + DCHECK_EQ(DataType::Type::kInt32, DataType::Kind(index->GetType())); + SetPackedFlag<kFlagIsStringCharAt>(is_string_char_at); SetRawInputAt(0, index); SetRawInputAt(1, length); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -5607,16 +5955,21 @@ class HBoundsCheck FINAL : public HExpression<2> { DECLARE_INSTRUCTION(BoundsCheck); + protected: + DEFAULT_COPY_CONSTRUCTOR(BoundsCheck); + private: static constexpr size_t kFlagIsStringCharAt = kNumberOfExpressionPackedBits; - - DISALLOW_COPY_AND_ASSIGN(HBoundsCheck); }; class HSuspendCheck FINAL : public HTemplateInstruction<0> { public: explicit HSuspendCheck(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc), slow_path_(nullptr) {} + : HTemplateInstruction(kSuspendCheck, SideEffects::CanTriggerGC(), dex_pc), + slow_path_(nullptr) { + } + + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { return true; @@ -5627,12 +5980,13 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(SuspendCheck); + protected: + DEFAULT_COPY_CONSTRUCTOR(SuspendCheck); + private: // Only used for code generation, in order to share the same slow path between back edges // of a same loop. SlowPathCode* slow_path_; - - DISALLOW_COPY_AND_ASSIGN(HSuspendCheck); }; // Pseudo-instruction which provides the native debugger with mapping information. @@ -5640,7 +5994,8 @@ class HSuspendCheck FINAL : public HTemplateInstruction<0> { class HNativeDebugInfo : public HTemplateInstruction<0> { public: explicit HNativeDebugInfo(uint32_t dex_pc) - : HTemplateInstruction<0>(SideEffects::None(), dex_pc) {} + : HTemplateInstruction<0>(kNativeDebugInfo, SideEffects::None(), dex_pc) { + } bool NeedsEnvironment() const OVERRIDE { return true; @@ -5648,8 +6003,8 @@ class HNativeDebugInfo : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(NativeDebugInfo); - private: - DISALLOW_COPY_AND_ASSIGN(HNativeDebugInfo); + protected: + DEFAULT_COPY_CONSTRUCTOR(NativeDebugInfo); }; /** @@ -5673,6 +6028,10 @@ class HLoadClass FINAL : public HInstruction { // Used for boot image classes referenced by apps in AOT- and JIT-compiled code. kBootImageAddress, + // Use a PC-relative load from a boot image ClassTable mmapped into the .bss + // of the oat file. + kBootImageClassTable, + // Load from an entry in the .bss section using a PC-relative load. // Used for classes outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -5694,7 +6053,7 @@ class HLoadClass FINAL : public HInstruction { bool is_referrers_class, uint32_t dex_pc, bool needs_access_check) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadClass, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), type_index_(type_index), dex_file_(dex_file), @@ -5711,6 +6070,8 @@ class HLoadClass FINAL : public HInstruction { SetPackedFlag<kFlagGenerateClInitCheck>(false); } + bool IsClonable() const OVERRIDE { return true; } + void SetLoadKind(LoadKind load_kind); LoadKind GetLoadKind() const { @@ -5792,8 +6153,8 @@ class HLoadClass FINAL : public HInstruction { &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - Primitive::Type GetType() const OVERRIDE { - return Primitive::kPrimNot; + DataType::Type GetType() const OVERRIDE { + return DataType::Type::kReference; } Handle<mirror::Class> GetClass() const { @@ -5802,6 +6163,9 @@ class HLoadClass FINAL : public HInstruction { DECLARE_INSTRUCTION(LoadClass); + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadClass); + private: static constexpr size_t kFlagNeedsAccessCheck = kNumberOfGenericPackedBits; static constexpr size_t kFlagIsInBootImage = kFlagNeedsAccessCheck + 1; @@ -5818,6 +6182,7 @@ class HLoadClass FINAL : public HInstruction { static bool HasTypeReference(LoadKind load_kind) { return load_kind == LoadKind::kReferrersClass || load_kind == LoadKind::kBootImageLinkTimePcRelative || + load_kind == LoadKind::kBootImageClassTable || load_kind == LoadKind::kBssEntry || load_kind == LoadKind::kRuntimeCall; } @@ -5840,17 +6205,30 @@ class HLoadClass FINAL : public HInstruction { Handle<mirror::Class> klass_; ReferenceTypeInfo loaded_class_rti_; - - DISALLOW_COPY_AND_ASSIGN(HLoadClass); }; std::ostream& operator<<(std::ostream& os, HLoadClass::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadClass::LoadKind). +inline void HLoadClass::SetLoadKind(LoadKind load_kind) { + // The load kind should be determined before inserting the instruction to the graph. + DCHECK(GetBlock() == nullptr); + DCHECK(GetEnvironment() == nullptr); + SetPackedField<LoadKindField>(load_kind); + if (load_kind != LoadKind::kRuntimeCall && load_kind != LoadKind::kReferrersClass) { + special_input_ = HUserRecord<HInstruction*>(nullptr); + } + if (!NeedsEnvironment()) { + SetSideEffects(SideEffects::None()); + } +} + +// Note: defined outside class to see operator<<(., HLoadClass::LoadKind). inline void HLoadClass::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || GetLoadKind() == LoadKind::kBootImageAddress || + GetLoadKind() == LoadKind::kBootImageClassTable || GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); DCHECK(special_input_.GetInstruction() == nullptr); special_input_ = HUserRecord<HInstruction*>(special_input); @@ -5869,6 +6247,10 @@ class HLoadString FINAL : public HInstruction { // Used for boot image strings referenced by apps in AOT- and JIT-compiled code. kBootImageAddress, + // Use a PC-relative load from a boot image InternTable mmapped into the .bss + // of the oat file. + kBootImageInternTable, + // Load from an entry in the .bss section using a PC-relative load. // Used for strings outside boot image when .bss is accessible with a PC-relative load. kBssEntry, @@ -5887,13 +6269,15 @@ class HLoadString FINAL : public HInstruction { dex::StringIndex string_index, const DexFile& dex_file, uint32_t dex_pc) - : HInstruction(SideEffectsForArchRuntimeCalls(), dex_pc), + : HInstruction(kLoadString, SideEffectsForArchRuntimeCalls(), dex_pc), special_input_(HUserRecord<HInstruction*>(current_method)), string_index_(string_index), dex_file_(dex_file) { SetPackedField<LoadKindField>(LoadKind::kRuntimeCall); } + bool IsClonable() const OVERRIDE { return true; } + void SetLoadKind(LoadKind load_kind); LoadKind GetLoadKind() const { @@ -5928,6 +6312,7 @@ class HLoadString FINAL : public HInstruction { LoadKind load_kind = GetLoadKind(); if (load_kind == LoadKind::kBootImageLinkTimePcRelative || load_kind == LoadKind::kBootImageAddress || + load_kind == LoadKind::kBootImageInternTable || load_kind == LoadKind::kJitTableAddress) { return false; } @@ -5953,12 +6338,15 @@ class HLoadString FINAL : public HInstruction { &special_input_, (special_input_.GetInstruction() != nullptr) ? 1u : 0u); } - Primitive::Type GetType() const OVERRIDE { - return Primitive::kPrimNot; + DataType::Type GetType() const OVERRIDE { + return DataType::Type::kReference; } DECLARE_INSTRUCTION(LoadString); + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadString); + private: static constexpr size_t kFieldLoadKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldLoadKindSize = @@ -5978,18 +6366,32 @@ class HLoadString FINAL : public HInstruction { const DexFile& dex_file_; Handle<mirror::String> string_; - - DISALLOW_COPY_AND_ASSIGN(HLoadString); }; std::ostream& operator<<(std::ostream& os, HLoadString::LoadKind rhs); // Note: defined outside class to see operator<<(., HLoadString::LoadKind). +inline void HLoadString::SetLoadKind(LoadKind load_kind) { + // The load kind should be determined before inserting the instruction to the graph. + DCHECK(GetBlock() == nullptr); + DCHECK(GetEnvironment() == nullptr); + DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall); + SetPackedField<LoadKindField>(load_kind); + if (load_kind != LoadKind::kRuntimeCall) { + special_input_ = HUserRecord<HInstruction*>(nullptr); + } + if (!NeedsEnvironment()) { + SetSideEffects(SideEffects::None()); + } +} + +// Note: defined outside class to see operator<<(., HLoadString::LoadKind). inline void HLoadString::AddSpecialInput(HInstruction* special_input) { // The special input is used for PC-relative loads on some architectures, // including literal pool loads, which are PC-relative too. DCHECK(GetLoadKind() == LoadKind::kBootImageLinkTimePcRelative || - GetLoadKind() == LoadKind::kBssEntry || - GetLoadKind() == LoadKind::kBootImageAddress) << GetLoadKind(); + GetLoadKind() == LoadKind::kBootImageAddress || + GetLoadKind() == LoadKind::kBootImageInternTable || + GetLoadKind() == LoadKind::kBssEntry) << GetLoadKind(); // HLoadString::GetInputRecords() returns an empty array at this point, // so use the GetInputRecords() from the base class to set the input record. DCHECK(special_input_.GetInstruction() == nullptr); @@ -6004,12 +6406,14 @@ class HClinitCheck FINAL : public HExpression<1> { public: HClinitCheck(HLoadClass* constant, uint32_t dex_pc) : HExpression( - Primitive::kPrimNot, - SideEffects::AllChanges(), // Assume write/read on all fields/arrays. + kClinitCheck, + DataType::Type::kReference, + SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { SetRawInputAt(0, constant); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -6029,22 +6433,26 @@ class HClinitCheck FINAL : public HExpression<1> { DECLARE_INSTRUCTION(ClinitCheck); - private: - DISALLOW_COPY_AND_ASSIGN(HClinitCheck); + + protected: + DEFAULT_COPY_CONSTRUCTOR(ClinitCheck); }; class HStaticFieldGet FINAL : public HExpression<1> { public: HStaticFieldGet(HInstruction* cls, ArtField* field, - Primitive::Type field_type, + DataType::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HExpression(field_type, SideEffects::FieldReadOfType(field_type, is_volatile), dex_pc), + : HExpression(kStaticFieldGet, + field_type, + SideEffects::FieldReadOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6056,6 +6464,7 @@ class HStaticFieldGet FINAL : public HExpression<1> { } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return !IsVolatile(); } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -6069,15 +6478,23 @@ class HStaticFieldGet FINAL : public HExpression<1> { const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } - Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } + void SetType(DataType::Type new_type) { + DCHECK(DataType::IsIntegralType(GetType())); + DCHECK(DataType::IsIntegralType(new_type)); + DCHECK_EQ(DataType::Size(GetType()), DataType::Size(new_type)); + SetPackedField<TypeField>(new_type); + } + DECLARE_INSTRUCTION(StaticFieldGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(StaticFieldGet); + private: const FieldInfo field_info_; - - DISALLOW_COPY_AND_ASSIGN(HStaticFieldGet); }; class HStaticFieldSet FINAL : public HTemplateInstruction<2> { @@ -6085,14 +6502,16 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { HStaticFieldSet(HInstruction* cls, HInstruction* value, ArtField* field, - Primitive::Type field_type, + DataType::Type field_type, MemberOffset field_offset, bool is_volatile, uint32_t field_idx, uint16_t declaring_class_def_index, const DexFile& dex_file, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::FieldWriteOfType(field_type, is_volatile), dex_pc), + : HTemplateInstruction(kStaticFieldSet, + SideEffects::FieldWriteOfType(field_type, is_volatile), + dex_pc), field_info_(field, field_offset, field_type, @@ -6105,9 +6524,10 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { SetRawInputAt(1, value); } + bool IsClonable() const OVERRIDE { return true; } const FieldInfo& GetFieldInfo() const { return field_info_; } MemberOffset GetFieldOffset() const { return field_info_.GetFieldOffset(); } - Primitive::Type GetFieldType() const { return field_info_.GetFieldType(); } + DataType::Type GetFieldType() const { return field_info_.GetFieldType(); } bool IsVolatile() const { return field_info_.IsVolatile(); } HInstruction* GetValue() const { return InputAt(1); } @@ -6116,6 +6536,9 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { DECLARE_INSTRUCTION(StaticFieldSet); + protected: + DEFAULT_COPY_CONSTRUCTOR(StaticFieldSet); + private: static constexpr size_t kFlagValueCanBeNull = kNumberOfGenericPackedBits; static constexpr size_t kNumberOfStaticFieldSetPackedBits = kFlagValueCanBeNull + 1; @@ -6123,144 +6546,161 @@ class HStaticFieldSet FINAL : public HTemplateInstruction<2> { "Too many packed fields."); const FieldInfo field_info_; - - DISALLOW_COPY_AND_ASSIGN(HStaticFieldSet); }; class HUnresolvedInstanceFieldGet FINAL : public HExpression<1> { public: HUnresolvedInstanceFieldGet(HInstruction* obj, - Primitive::Type field_type, + DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedInstanceFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetRawInputAt(0, obj); } + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - Primitive::Type GetFieldType() const { return GetType(); } + DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } DECLARE_INSTRUCTION(UnresolvedInstanceFieldGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(UnresolvedInstanceFieldGet); + private: const uint32_t field_index_; - - DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldGet); }; class HUnresolvedInstanceFieldSet FINAL : public HTemplateInstruction<2> { public: HUnresolvedInstanceFieldSet(HInstruction* obj, HInstruction* value, - Primitive::Type field_type, + DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedInstanceFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); - DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType())); + DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); SetRawInputAt(0, obj); SetRawInputAt(1, value); } + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - Primitive::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } + DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } DECLARE_INSTRUCTION(UnresolvedInstanceFieldSet); + protected: + DEFAULT_COPY_CONSTRUCTOR(UnresolvedInstanceFieldSet); + private: static constexpr size_t kFieldFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kNumberOfUnresolvedStaticFieldSetPackedBits = kFieldFieldType + kFieldFieldTypeSize; static_assert(kNumberOfUnresolvedStaticFieldSetPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); - using FieldTypeField = BitField<Primitive::Type, kFieldFieldType, kFieldFieldTypeSize>; + using FieldTypeField = BitField<DataType::Type, kFieldFieldType, kFieldFieldTypeSize>; const uint32_t field_index_; - - DISALLOW_COPY_AND_ASSIGN(HUnresolvedInstanceFieldSet); }; class HUnresolvedStaticFieldGet FINAL : public HExpression<0> { public: - HUnresolvedStaticFieldGet(Primitive::Type field_type, + HUnresolvedStaticFieldGet(DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HExpression(field_type, SideEffects::AllExceptGCDependency(), dex_pc), + : HExpression(kUnresolvedStaticFieldGet, + field_type, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { } + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - Primitive::Type GetFieldType() const { return GetType(); } + DataType::Type GetFieldType() const { return GetType(); } uint32_t GetFieldIndex() const { return field_index_; } DECLARE_INSTRUCTION(UnresolvedStaticFieldGet); + protected: + DEFAULT_COPY_CONSTRUCTOR(UnresolvedStaticFieldGet); + private: const uint32_t field_index_; - - DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldGet); }; class HUnresolvedStaticFieldSet FINAL : public HTemplateInstruction<1> { public: HUnresolvedStaticFieldSet(HInstruction* value, - Primitive::Type field_type, + DataType::Type field_type, uint32_t field_index, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::AllExceptGCDependency(), dex_pc), + : HTemplateInstruction(kUnresolvedStaticFieldSet, + SideEffects::AllExceptGCDependency(), + dex_pc), field_index_(field_index) { SetPackedField<FieldTypeField>(field_type); - DCHECK_EQ(Primitive::PrimitiveKind(field_type), Primitive::PrimitiveKind(value->GetType())); + DCHECK_EQ(DataType::Kind(field_type), DataType::Kind(value->GetType())); SetRawInputAt(0, value); } + bool IsClonable() const OVERRIDE { return true; } bool NeedsEnvironment() const OVERRIDE { return true; } bool CanThrow() const OVERRIDE { return true; } - Primitive::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } + DataType::Type GetFieldType() const { return GetPackedField<FieldTypeField>(); } uint32_t GetFieldIndex() const { return field_index_; } DECLARE_INSTRUCTION(UnresolvedStaticFieldSet); + protected: + DEFAULT_COPY_CONSTRUCTOR(UnresolvedStaticFieldSet); + private: static constexpr size_t kFieldFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kNumberOfUnresolvedStaticFieldSetPackedBits = kFieldFieldType + kFieldFieldTypeSize; static_assert(kNumberOfUnresolvedStaticFieldSetPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); - using FieldTypeField = BitField<Primitive::Type, kFieldFieldType, kFieldFieldTypeSize>; + using FieldTypeField = BitField<DataType::Type, kFieldFieldType, kFieldFieldTypeSize>; const uint32_t field_index_; - - DISALLOW_COPY_AND_ASSIGN(HUnresolvedStaticFieldSet); }; // Implement the move-exception DEX instruction. class HLoadException FINAL : public HExpression<0> { public: explicit HLoadException(uint32_t dex_pc = kNoDexPc) - : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc) {} + : HExpression(kLoadException, DataType::Type::kReference, SideEffects::None(), dex_pc) { + } bool CanBeNull() const OVERRIDE { return false; } DECLARE_INSTRUCTION(LoadException); - private: - DISALLOW_COPY_AND_ASSIGN(HLoadException); + protected: + DEFAULT_COPY_CONSTRUCTOR(LoadException); }; // Implicit part of move-exception which clears thread-local exception storage. @@ -6268,18 +6708,19 @@ class HLoadException FINAL : public HExpression<0> { class HClearException FINAL : public HTemplateInstruction<0> { public: explicit HClearException(uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::AllWrites(), dex_pc) {} + : HTemplateInstruction(kClearException, SideEffects::AllWrites(), dex_pc) { + } DECLARE_INSTRUCTION(ClearException); - private: - DISALLOW_COPY_AND_ASSIGN(HClearException); + protected: + DEFAULT_COPY_CONSTRUCTOR(ClearException); }; class HThrow FINAL : public HTemplateInstruction<1> { public: HThrow(HInstruction* exception, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + : HTemplateInstruction(kThrow, SideEffects::CanTriggerGC(), dex_pc) { SetRawInputAt(0, exception); } @@ -6289,11 +6730,12 @@ class HThrow FINAL : public HTemplateInstruction<1> { bool CanThrow() const OVERRIDE { return true; } + bool AlwaysThrows() const OVERRIDE { return true; } DECLARE_INSTRUCTION(Throw); - private: - DISALLOW_COPY_AND_ASSIGN(HThrow); + protected: + DEFAULT_COPY_CONSTRUCTOR(Throw); }; /** @@ -6316,18 +6758,26 @@ std::ostream& operator<<(std::ostream& os, TypeCheckKind rhs); class HInstanceOf FINAL : public HExpression<2> { public: HInstanceOf(HInstruction* object, - HLoadClass* constant, + HLoadClass* target_class, TypeCheckKind check_kind, uint32_t dex_pc) - : HExpression(Primitive::kPrimBoolean, + : HExpression(kInstanceOf, + DataType::Type::kBool, SideEffectsForArchRuntimeCalls(check_kind), dex_pc) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); SetRawInputAt(0, object); - SetRawInputAt(1, constant); + SetRawInputAt(1, target_class); } + HLoadClass* GetTargetClass() const { + HInstruction* load_class = InputAt(1); + DCHECK(load_class->IsLoadClass()); + return load_class->AsLoadClass(); + } + + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -6355,6 +6805,9 @@ class HInstanceOf FINAL : public HExpression<2> { DECLARE_INSTRUCTION(InstanceOf); + protected: + DEFAULT_COPY_CONSTRUCTOR(InstanceOf); + private: static constexpr size_t kFieldTypeCheckKind = kNumberOfExpressionPackedBits; static constexpr size_t kFieldTypeCheckKindSize = @@ -6363,21 +6816,21 @@ class HInstanceOf FINAL : public HExpression<2> { static constexpr size_t kNumberOfInstanceOfPackedBits = kFlagMustDoNullCheck + 1; static_assert(kNumberOfInstanceOfPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; - - DISALLOW_COPY_AND_ASSIGN(HInstanceOf); }; class HBoundType FINAL : public HExpression<1> { public: explicit HBoundType(HInstruction* input, uint32_t dex_pc = kNoDexPc) - : HExpression(Primitive::kPrimNot, SideEffects::None(), dex_pc), + : HExpression(kBoundType, DataType::Type::kReference, SideEffects::None(), dex_pc), upper_bound_(ReferenceTypeInfo::CreateInvalid()) { SetPackedFlag<kFlagUpperCanBeNull>(true); SetPackedFlag<kFlagCanBeNull>(true); - DCHECK_EQ(input->GetType(), Primitive::kPrimNot); + DCHECK_EQ(input->GetType(), DataType::Type::kReference); SetRawInputAt(0, input); } + bool IsClonable() const OVERRIDE { return true; } + // {Get,Set}Upper* should only be used in reference type propagation. const ReferenceTypeInfo& GetUpperBound() const { return upper_bound_; } bool GetUpperCanBeNull() const { return GetPackedFlag<kFlagUpperCanBeNull>(); } @@ -6392,6 +6845,9 @@ class HBoundType FINAL : public HExpression<1> { DECLARE_INSTRUCTION(BoundType); + protected: + DEFAULT_COPY_CONSTRUCTOR(BoundType); + private: // Represents the top constraint that can_be_null_ cannot exceed (i.e. if this // is false then CanBeNull() cannot be true). @@ -6407,23 +6863,28 @@ class HBoundType FINAL : public HExpression<1> { // // uper_bound_ will be ClassX // } ReferenceTypeInfo upper_bound_; - - DISALLOW_COPY_AND_ASSIGN(HBoundType); }; class HCheckCast FINAL : public HTemplateInstruction<2> { public: HCheckCast(HInstruction* object, - HLoadClass* constant, + HLoadClass* target_class, TypeCheckKind check_kind, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) { + : HTemplateInstruction(kCheckCast, SideEffects::CanTriggerGC(), dex_pc) { SetPackedField<TypeCheckKindField>(check_kind); SetPackedFlag<kFlagMustDoNullCheck>(true); SetRawInputAt(0, object); - SetRawInputAt(1, constant); + SetRawInputAt(1, target_class); + } + + HLoadClass* GetTargetClass() const { + HInstruction* load_class = InputAt(1); + DCHECK(load_class->IsLoadClass()); + return load_class->AsLoadClass(); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { @@ -6444,6 +6905,9 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { DECLARE_INSTRUCTION(CheckCast); + protected: + DEFAULT_COPY_CONSTRUCTOR(CheckCast); + private: static constexpr size_t kFieldTypeCheckKind = kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeCheckKindSize = @@ -6452,8 +6916,6 @@ class HCheckCast FINAL : public HTemplateInstruction<2> { static constexpr size_t kNumberOfCheckCastPackedBits = kFlagMustDoNullCheck + 1; static_assert(kNumberOfCheckCastPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using TypeCheckKindField = BitField<TypeCheckKind, kFieldTypeCheckKind, kFieldTypeCheckKindSize>; - - DISALLOW_COPY_AND_ASSIGN(HCheckCast); }; /** @@ -6486,14 +6948,21 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { public: explicit HMemoryBarrier(MemBarrierKind barrier_kind, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction( - SideEffects::AllWritesAndReads(), dex_pc) { // Assume write/read on all fields/arrays. + kMemoryBarrier, + SideEffects::AllWritesAndReads(), // Assume write/read on all fields/arrays. + dex_pc) { SetPackedField<BarrierKindField>(barrier_kind); } + bool IsClonable() const OVERRIDE { return true; } + MemBarrierKind GetBarrierKind() { return GetPackedField<BarrierKindField>(); } DECLARE_INSTRUCTION(MemoryBarrier); + protected: + DEFAULT_COPY_CONSTRUCTOR(MemoryBarrier); + private: static constexpr size_t kFieldBarrierKind = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldBarrierKindSize = @@ -6503,8 +6972,6 @@ class HMemoryBarrier FINAL : public HTemplateInstruction<0> { static_assert(kNumberOfMemoryBarrierPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); using BarrierKindField = BitField<MemBarrierKind, kFieldBarrierKind, kFieldBarrierKindSize>; - - DISALLOW_COPY_AND_ASSIGN(HMemoryBarrier); }; // A constructor fence orders all prior stores to fields that could be accessed via a final field of @@ -6580,7 +7047,7 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // about the associated object. HConstructorFence(HInstruction* fence_object, uint32_t dex_pc, - ArenaAllocator* arena) + ArenaAllocator* allocator) // We strongly suspect there is not a more accurate way to describe the fine-grained reordering // constraints described in the class header. We claim that these SideEffects constraints // enforce a superset of the real constraints. @@ -6602,9 +7069,10 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // // If in a later phase we discover that there are no writes to reference final fields, // we can refine the side effect to a smaller set of type reads (see above constraints). - : HVariableInputSizeInstruction(SideEffects::AllReads(), + : HVariableInputSizeInstruction(kConstructorFence, + SideEffects::AllReads(), dex_pc, - arena, + allocator, /* number_of_inputs */ 1, kArenaAllocConstructorFenceInputs) { DCHECK(fence_object != nullptr); @@ -6630,20 +7098,33 @@ class HConstructorFence FINAL : public HVariableInputSizeInstruction { // This must *not* be called during/after prepare_for_register_allocation, // because that removes all the inputs to the fences but the fence is actually // still considered live. - static void RemoveConstructorFences(HInstruction* instruction); + // + // Returns how many HConstructorFence instructions were removed from graph. + static size_t RemoveConstructorFences(HInstruction* instruction); + + // Combine all inputs of `this` and `other` instruction and remove + // `other` from the graph. + // + // Inputs are unique after the merge. + // + // Requirement: `this` must not be the same as `other. + void Merge(HConstructorFence* other); // Check if this constructor fence is protecting // an HNewInstance or HNewArray that is also the immediate // predecessor of `this`. // + // If `ignore_inputs` is true, then the immediate predecessor doesn't need + // to be one of the inputs of `this`. + // // Returns the associated HNewArray or HNewInstance, // or null otherwise. - HInstruction* GetAssociatedAllocation(); + HInstruction* GetAssociatedAllocation(bool ignore_inputs = false); DECLARE_INSTRUCTION(ConstructorFence); - private: - DISALLOW_COPY_AND_ASSIGN(HConstructorFence); + protected: + DEFAULT_COPY_CONSTRUCTOR(ConstructorFence); }; class HMonitorOperation FINAL : public HTemplateInstruction<1> { @@ -6656,6 +7137,7 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { HMonitorOperation(HInstruction* object, OperationKind kind, uint32_t dex_pc) : HTemplateInstruction( + kMonitorOperation, SideEffects::AllExceptGCDependency(), // Assume write/read on all fields/arrays. dex_pc) { SetPackedField<OperationKindField>(kind); @@ -6677,6 +7159,9 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(MonitorOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(MonitorOperation); + private: static constexpr size_t kFieldOperationKind = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldOperationKindSize = @@ -6686,9 +7171,6 @@ class HMonitorOperation FINAL : public HTemplateInstruction<1> { static_assert(kNumberOfMonitorOperationPackedBits <= HInstruction::kMaxNumberOfPackedBits, "Too many packed fields."); using OperationKindField = BitField<OperationKind, kFieldOperationKind, kFieldOperationKindSize>; - - private: - DISALLOW_COPY_AND_ASSIGN(HMonitorOperation); }; class HSelect FINAL : public HExpression<3> { @@ -6697,7 +7179,7 @@ class HSelect FINAL : public HExpression<3> { HInstruction* true_value, HInstruction* false_value, uint32_t dex_pc) - : HExpression(HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { + : HExpression(kSelect, HPhi::ToPhiType(true_value->GetType()), SideEffects::None(), dex_pc) { DCHECK_EQ(HPhi::ToPhiType(true_value->GetType()), HPhi::ToPhiType(false_value->GetType())); // First input must be `true_value` or `false_value` to allow codegens to @@ -6709,6 +7191,7 @@ class HSelect FINAL : public HExpression<3> { SetRawInputAt(2, condition); } + bool IsClonable() const OVERRIDE { return true; } HInstruction* GetFalseValue() const { return InputAt(0); } HInstruction* GetTrueValue() const { return InputAt(1); } HInstruction* GetCondition() const { return InputAt(2); } @@ -6724,15 +7207,15 @@ class HSelect FINAL : public HExpression<3> { DECLARE_INSTRUCTION(Select); - private: - DISALLOW_COPY_AND_ASSIGN(HSelect); + protected: + DEFAULT_COPY_CONSTRUCTOR(Select); }; class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { public: MoveOperands(Location source, Location destination, - Primitive::Type type, + DataType::Type type, HInstruction* instruction) : source_(source), destination_(destination), type_(type), instruction_(instruction) {} @@ -6782,10 +7265,10 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { return source_.IsInvalid(); } - Primitive::Type GetType() const { return type_; } + DataType::Type GetType() const { return type_; } bool Is64BitMove() const { - return Primitive::Is64BitType(type_); + return DataType::Is64BitType(type_); } HInstruction* GetInstruction() const { return instruction_; } @@ -6794,7 +7277,7 @@ class MoveOperands : public ArenaObject<kArenaAllocMoveOperands> { Location source_; Location destination_; // The type this move is for. - Primitive::Type type_; + DataType::Type type_; // The instruction this move is assocatied with. Null when this move is // for moving an input in the expected locations of user (including a phi user). // This is only used in debug mode, to ensure we do not connect interval siblings @@ -6808,15 +7291,15 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove FINAL : public HTemplateInstruction<0> { public: - explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), - moves_(arena->Adapter(kArenaAllocMoveOperands)) { + explicit HParallelMove(ArenaAllocator* allocator, uint32_t dex_pc = kNoDexPc) + : HTemplateInstruction(kParallelMove, SideEffects::None(), dex_pc), + moves_(allocator->Adapter(kArenaAllocMoveOperands)) { moves_.reserve(kDefaultNumberOfMoves); } void AddMove(Location source, Location destination, - Primitive::Type type, + DataType::Type type, HInstruction* instruction) { DCHECK(source.IsValid()); DCHECK(destination.IsValid()); @@ -6856,12 +7339,49 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { DECLARE_INSTRUCTION(ParallelMove); + protected: + DEFAULT_COPY_CONSTRUCTOR(ParallelMove); + private: ArenaVector<MoveOperands> moves_; +}; + +// This instruction computes an intermediate address pointing in the 'middle' of an object. The +// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is +// never used across anything that can trigger GC. +// The result of this instruction is not a pointer in the sense of `DataType::Type::kreference`. +// So we represent it by the type `DataType::Type::kInt`. +class HIntermediateAddress FINAL : public HExpression<2> { + public: + HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) + : HExpression(kIntermediateAddress, + DataType::Type::kInt32, + SideEffects::DependsOnGC(), + dex_pc) { + DCHECK_EQ(DataType::Size(DataType::Type::kInt32), + DataType::Size(DataType::Type::kReference)) + << "kPrimInt and kPrimNot have different sizes."; + SetRawInputAt(0, base_address); + SetRawInputAt(1, offset); + } + + bool IsClonable() const OVERRIDE { return true; } + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetBaseAddress() const { return InputAt(0); } + HInstruction* GetOffset() const { return InputAt(1); } - DISALLOW_COPY_AND_ASSIGN(HParallelMove); + DECLARE_INSTRUCTION(IntermediateAddress); + + protected: + DEFAULT_COPY_CONSTRUCTOR(IntermediateAddress); }; + } // namespace art #include "nodes_vector.h" @@ -6878,9 +7398,13 @@ class HParallelMove FINAL : public HTemplateInstruction<0> { namespace art { +class OptimizingCompilerStats; + class HGraphVisitor : public ValueObject { public: - explicit HGraphVisitor(HGraph* graph) : graph_(graph) {} + explicit HGraphVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr) + : stats_(stats), + graph_(graph) {} virtual ~HGraphVisitor() {} virtual void VisitInstruction(HInstruction* instruction ATTRIBUTE_UNUSED) {} @@ -6902,6 +7426,9 @@ class HGraphVisitor : public ValueObject { #undef DECLARE_VISIT_INSTRUCTION + protected: + OptimizingCompilerStats* stats_; + private: HGraph* const graph_; @@ -6910,7 +7437,8 @@ class HGraphVisitor : public ValueObject { class HGraphDelegateVisitor : public HGraphVisitor { public: - explicit HGraphDelegateVisitor(HGraph* graph) : HGraphVisitor(graph) {} + explicit HGraphDelegateVisitor(HGraph* graph, OptimizingCompilerStats* stats = nullptr) + : HGraphVisitor(graph, stats) {} virtual ~HGraphDelegateVisitor() {} // Visit functions that delegate to to super class. @@ -6925,6 +7453,33 @@ class HGraphDelegateVisitor : public HGraphVisitor { DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor); }; +// Create a clone of the instruction, insert it into the graph; replace the old one with a new +// and remove the old instruction. +HInstruction* ReplaceInstrOrPhiByClone(HInstruction* instr); + +// Create a clone for each clonable instructions/phis and replace the original with the clone. +// +// Used for testing individual instruction cloner. +class CloneAndReplaceInstructionVisitor : public HGraphDelegateVisitor { + public: + explicit CloneAndReplaceInstructionVisitor(HGraph* graph) + : HGraphDelegateVisitor(graph), instr_replaced_by_clones_count_(0) {} + + void VisitInstruction(HInstruction* instruction) OVERRIDE { + if (instruction->IsClonable()) { + ReplaceInstrOrPhiByClone(instruction); + instr_replaced_by_clones_count_++; + } + } + + size_t GetInstrReplacedByClonesCount() const { return instr_replaced_by_clones_count_; } + + private: + size_t instr_replaced_by_clones_count_; + + DISALLOW_COPY_AND_ASSIGN(CloneAndReplaceInstructionVisitor); +}; + // Iterator over the blocks that art part of the loop. Includes blocks part // of an inner loop. The order in which the blocks are iterated is on their // block id. @@ -7018,6 +7573,17 @@ inline bool IsInt64AndGet(HInstruction* instruction, /*out*/ int64_t* value) { return false; } +// Returns true iff instruction is the given integral constant. +inline bool IsInt64Value(HInstruction* instruction, int64_t value) { + int64_t val = 0; + return IsInt64AndGet(instruction, &val) && val == value; +} + +// Returns true iff instruction is a zero bit pattern. +inline bool IsZeroBitPattern(HInstruction* instruction) { + return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern(); +} + #define INSTRUCTION_TYPE_CHECK(type, super) \ inline bool HInstruction::Is##type() const { return GetKind() == k##type; } \ inline const H##type* HInstruction::As##type() const { \ @@ -7059,6 +7625,10 @@ inline HInstruction* HuntForDeclaration(HInstruction* instruction) { return instruction; } +void RemoveEnvironmentUses(HInstruction* instruction); +bool HasEnvironmentUsedByOthers(HInstruction* instruction); +void ResetEnvironmentInputRecords(HInstruction* instruction); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h index 8e439d9621..d0e0fef946 100644 --- a/compiler/optimizing/nodes_mips.h +++ b/compiler/optimizing/nodes_mips.h @@ -24,14 +24,18 @@ class HMipsComputeBaseMethodAddress : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HMipsComputeBaseMethodAddress() - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + : HExpression(kMipsComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } DECLARE_INSTRUCTION(MipsComputeBaseMethodAddress); - private: - DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress); + protected: + DEFAULT_COPY_CONSTRUCTOR(MipsComputeBaseMethodAddress); }; // Mips version of HPackedSwitch that holds a pointer to the base method address. @@ -42,7 +46,7 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HMipsComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kMipsPackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -62,11 +66,55 @@ class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> { DECLARE_INSTRUCTION(MipsPackedSwitch); + protected: + DEFAULT_COPY_CONSTRUCTOR(MipsPackedSwitch); + private: const int32_t start_value_; const int32_t num_entries_; +}; + +// This instruction computes part of the array access offset (index offset). +// +// For array accesses the element address has the following structure: +// Address = CONST_OFFSET + base_addr + index << ELEM_SHIFT. The address part +// (index << ELEM_SHIFT) can be shared across array accesses with +// the same data type and index. For example, in the following loop 5 accesses can share address +// computation: +// +// void foo(int[] a, int[] b, int[] c) { +// for (i...) { +// a[i] = a[i] + 5; +// b[i] = b[i] + c[i]; +// } +// } +// +// Note: as the instruction doesn't involve base array address into computations it has no side +// effects. +class HIntermediateArrayAddressIndex FINAL : public HExpression<2> { + public: + HIntermediateArrayAddressIndex(HInstruction* index, HInstruction* shift, uint32_t dex_pc) + : HExpression(kIntermediateArrayAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { + SetRawInputAt(0, index); + SetRawInputAt(1, shift); + } + + bool CanBeMoved() const OVERRIDE { return true; } + bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { + return true; + } + bool IsActualObject() const OVERRIDE { return false; } + + HInstruction* GetIndex() const { return InputAt(0); } + HInstruction* GetShift() const { return InputAt(1); } + + DECLARE_INSTRUCTION(IntermediateArrayAddressIndex); - DISALLOW_COPY_AND_ASSIGN(HMipsPackedSwitch); + protected: + DEFAULT_COPY_CONSTRUCTOR(IntermediateArrayAddressIndex); }; } // namespace art diff --git a/compiler/optimizing/nodes_shared.cc b/compiler/optimizing/nodes_shared.cc index f145bf9130..2f971b93a6 100644 --- a/compiler/optimizing/nodes_shared.cc +++ b/compiler/optimizing/nodes_shared.cc @@ -14,9 +14,15 @@ * limitations under the License. */ -#include "common_arm64.h" +// Note: this include order may seem strange and is against the regular style. However it is the +// required order as nodes_shared does not have the right dependency chain and compilation +// will fail (as AsType on HInstruction will be defined before the full Instruction). +#include "nodes.h" + #include "nodes_shared.h" +#include "common_arm64.h" + namespace art { using helpers::CanFitInShifterOperand; @@ -36,20 +42,23 @@ void HDataProcWithShifterOp::GetOpInfoFromInstruction(HInstruction* instruction, *shift_amount = instruction->AsUShr()->GetRight()->AsIntConstant()->GetValue(); } else { DCHECK(instruction->IsTypeConversion()); - Primitive::Type result_type = instruction->AsTypeConversion()->GetResultType(); - Primitive::Type input_type = instruction->AsTypeConversion()->GetInputType(); - int result_size = Primitive::ComponentSize(result_type); - int input_size = Primitive::ComponentSize(input_type); + DataType::Type result_type = instruction->AsTypeConversion()->GetResultType(); + DataType::Type input_type = instruction->AsTypeConversion()->GetInputType(); + int result_size = DataType::Size(result_type); + int input_size = DataType::Size(input_type); int min_size = std::min(result_size, input_size); - if (result_type == Primitive::kPrimInt && input_type == Primitive::kPrimLong) { + if (result_type == DataType::Type::kInt32 && input_type == DataType::Type::kInt64) { // There is actually nothing to do. On ARM the high register from the // pair will be ignored. On ARM64 the register will be used as a W // register, discarding the top bits. This is represented by the // default encoding 'LSL 0'. *op_kind = kLSL; *shift_amount = 0; - } else if (result_type == Primitive::kPrimChar || - (input_type == Primitive::kPrimChar && input_size < result_size)) { + } else if (result_type == DataType::Type::kUint8 || + (input_type == DataType::Type::kUint8 && input_size < result_size)) { + *op_kind = kUXTB; + } else if (result_type == DataType::Type::kUint16 || + (input_type == DataType::Type::kUint16 && input_size < result_size)) { *op_kind = kUXTH; } else { switch (min_size) { diff --git a/compiler/optimizing/nodes_shared.h b/compiler/optimizing/nodes_shared.h index 075a816f3f..29358e1141 100644 --- a/compiler/optimizing/nodes_shared.h +++ b/compiler/optimizing/nodes_shared.h @@ -26,18 +26,21 @@ namespace art { class HMultiplyAccumulate FINAL : public HExpression<3> { public: - HMultiplyAccumulate(Primitive::Type type, + HMultiplyAccumulate(DataType::Type type, InstructionKind op, HInstruction* accumulator, HInstruction* mul_left, HInstruction* mul_right, uint32_t dex_pc = kNoDexPc) - : HExpression(type, SideEffects::None(), dex_pc), op_kind_(op) { + : HExpression(kMultiplyAccumulate, type, SideEffects::None(), dex_pc), + op_kind_(op) { SetRawInputAt(kInputAccumulatorIndex, accumulator); SetRawInputAt(kInputMulLeftIndex, mul_left); SetRawInputAt(kInputMulRightIndex, mul_right); } + bool IsClonable() const OVERRIDE { return true; } + static constexpr int kInputAccumulatorIndex = 0; static constexpr int kInputMulLeftIndex = 1; static constexpr int kInputMulRightIndex = 2; @@ -51,21 +54,27 @@ class HMultiplyAccumulate FINAL : public HExpression<3> { DECLARE_INSTRUCTION(MultiplyAccumulate); + protected: + DEFAULT_COPY_CONSTRUCTOR(MultiplyAccumulate); + private: // Indicates if this is a MADD or MSUB. const InstructionKind op_kind_; - - DISALLOW_COPY_AND_ASSIGN(HMultiplyAccumulate); }; class HBitwiseNegatedRight FINAL : public HBinaryOperation { public: - HBitwiseNegatedRight(Primitive::Type result_type, - InstructionKind op, - HInstruction* left, - HInstruction* right, - uint32_t dex_pc = kNoDexPc) - : HBinaryOperation(result_type, left, right, SideEffects::None(), dex_pc), + HBitwiseNegatedRight(DataType::Type result_type, + InstructionKind op, + HInstruction* left, + HInstruction* right, + uint32_t dex_pc = kNoDexPc) + : HBinaryOperation(kBitwiseNegatedRight, + result_type, + left, + right, + SideEffects::None(), + dex_pc), op_kind_(op) { DCHECK(op == HInstruction::kAnd || op == HInstruction::kOr || op == HInstruction::kXor) << op; } @@ -111,43 +120,12 @@ class HBitwiseNegatedRight FINAL : public HBinaryOperation { DECLARE_INSTRUCTION(BitwiseNegatedRight); + protected: + DEFAULT_COPY_CONSTRUCTOR(BitwiseNegatedRight); + private: // Specifies the bitwise operation, which will be then negated. const InstructionKind op_kind_; - - DISALLOW_COPY_AND_ASSIGN(HBitwiseNegatedRight); -}; - - -// This instruction computes an intermediate address pointing in the 'middle' of an object. The -// result pointer cannot be handled by GC, so extra care is taken to make sure that this value is -// never used across anything that can trigger GC. -// The result of this instruction is not a pointer in the sense of `Primitive::kPrimNot`. So we -// represent it by the type `Primitive::kPrimInt`. -class HIntermediateAddress FINAL : public HExpression<2> { - public: - HIntermediateAddress(HInstruction* base_address, HInstruction* offset, uint32_t dex_pc) - : HExpression(Primitive::kPrimInt, SideEffects::DependsOnGC(), dex_pc) { - DCHECK_EQ(Primitive::ComponentSize(Primitive::kPrimInt), - Primitive::ComponentSize(Primitive::kPrimNot)) - << "kPrimInt and kPrimNot have different sizes."; - SetRawInputAt(0, base_address); - SetRawInputAt(1, offset); - } - - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { - return true; - } - bool IsActualObject() const OVERRIDE { return false; } - - HInstruction* GetBaseAddress() const { return InputAt(0); } - HInstruction* GetOffset() const { return InputAt(1); } - - DECLARE_INSTRUCTION(IntermediateAddress); - - private: - DISALLOW_COPY_AND_ASSIGN(HIntermediateAddress); }; // This instruction computes part of the array access offset (data and index offset). @@ -171,12 +149,16 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { public: HIntermediateAddressIndex( HInstruction* index, HInstruction* offset, HInstruction* shift, uint32_t dex_pc) - : HExpression(Primitive::kPrimInt, SideEffects::None(), dex_pc) { + : HExpression(kIntermediateAddressIndex, + DataType::Type::kInt32, + SideEffects::None(), + dex_pc) { SetRawInputAt(0, index); SetRawInputAt(1, offset); SetRawInputAt(2, shift); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE { return true; @@ -189,8 +171,8 @@ class HIntermediateAddressIndex FINAL : public HExpression<3> { DECLARE_INSTRUCTION(IntermediateAddressIndex); - private: - DISALLOW_COPY_AND_ASSIGN(HIntermediateAddressIndex); + protected: + DEFAULT_COPY_CONSTRUCTOR(IntermediateAddressIndex); }; class HDataProcWithShifterOp FINAL : public HExpression<2> { @@ -220,9 +202,9 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { // is an extension. int shift = 0, uint32_t dex_pc = kNoDexPc) - : HExpression(instr->GetType(), SideEffects::None(), dex_pc), + : HExpression(kDataProcWithShifterOp, instr->GetType(), SideEffects::None(), dex_pc), instr_kind_(instr->GetKind()), op_kind_(op), - shift_amount_(shift & (instr->GetType() == Primitive::kPrimInt + shift_amount_(shift & (instr->GetType() == DataType::Type::kInt32 ? kMaxIntShiftDistance : kMaxLongShiftDistance)) { DCHECK(!instr->HasSideEffects()); @@ -230,6 +212,7 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { SetRawInputAt(1, right); } + bool IsClonable() const OVERRIDE { return true; } bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other_instr) const OVERRIDE { const HDataProcWithShifterOp* other = other_instr->AsDataProcWithShifterOp(); @@ -257,14 +240,15 @@ class HDataProcWithShifterOp FINAL : public HExpression<2> { DECLARE_INSTRUCTION(DataProcWithShifterOp); + protected: + DEFAULT_COPY_CONSTRUCTOR(DataProcWithShifterOp); + private: InstructionKind instr_kind_; OpKind op_kind_; int shift_amount_; friend std::ostream& operator<<(std::ostream& os, OpKind op); - - DISALLOW_COPY_AND_ASSIGN(HDataProcWithShifterOp); }; std::ostream& operator<<(std::ostream& os, const HDataProcWithShifterOp::OpKind op); diff --git a/compiler/optimizing/nodes_test.cc b/compiler/optimizing/nodes_test.cc index 7686ba851b..9bfd250ea4 100644 --- a/compiler/optimizing/nodes_test.cc +++ b/compiler/optimizing/nodes_test.cc @@ -14,45 +14,45 @@ * limitations under the License. */ -#include "base/arena_allocator.h" #include "nodes.h" + +#include "base/arena_allocator.h" #include "optimizing_unit_test.h" #include "gtest/gtest.h" namespace art { +class NodeTest : public OptimizingUnitTest {}; + /** * Test that removing instruction from the graph removes itself from user lists * and environment lists. */ -TEST(Node, RemoveInstruction) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); +TEST_F(NodeTest, RemoveInstruction) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); - entry->AddInstruction(new (&allocator) HGoto()); + entry->AddInstruction(new (GetAllocator()) HGoto()); - HBasicBlock* first_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* first_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(first_block); entry->AddSuccessor(first_block); - HInstruction* null_check = new (&allocator) HNullCheck(parameter, 0); + HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter, 0); first_block->AddInstruction(null_check); - first_block->AddInstruction(new (&allocator) HReturnVoid()); + first_block->AddInstruction(new (GetAllocator()) HReturnVoid()); - HBasicBlock* exit_block = new (&allocator) HBasicBlock(graph); + HBasicBlock* exit_block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit_block); first_block->AddSuccessor(exit_block); - exit_block->AddInstruction(new (&allocator) HExit()); + exit_block->AddInstruction(new (GetAllocator()) HExit()); - HEnvironment* environment = new (&allocator) HEnvironment( - &allocator, 1, graph->GetArtMethod(), 0, null_check); + HEnvironment* environment = new (GetAllocator()) HEnvironment( + GetAllocator(), 1, graph->GetArtMethod(), 0, null_check); null_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, parameter); parameter->AddEnvUseAt(null_check->GetEnvironment(), 0); @@ -69,25 +69,22 @@ TEST(Node, RemoveInstruction) { /** * Test that inserting an instruction in the graph updates user lists. */ -TEST(Node, InsertInstruction) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); +TEST_F(NodeTest, InsertInstruction) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter1 = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* parameter2 = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter1 = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* parameter2 = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter1); entry->AddInstruction(parameter2); - entry->AddInstruction(new (&allocator) HExit()); + entry->AddInstruction(new (GetAllocator()) HExit()); ASSERT_FALSE(parameter1->HasUses()); - HInstruction* to_insert = new (&allocator) HNullCheck(parameter1, 0); + HInstruction* to_insert = new (GetAllocator()) HNullCheck(parameter1, 0); entry->InsertInstructionBefore(to_insert, parameter2); ASSERT_TRUE(parameter1->HasUses()); @@ -97,72 +94,65 @@ TEST(Node, InsertInstruction) { /** * Test that adding an instruction in the graph updates user lists. */ -TEST(Node, AddInstruction) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); +TEST_F(NodeTest, AddInstruction) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); ASSERT_FALSE(parameter->HasUses()); - HInstruction* to_add = new (&allocator) HNullCheck(parameter, 0); + HInstruction* to_add = new (GetAllocator()) HNullCheck(parameter, 0); entry->AddInstruction(to_add); ASSERT_TRUE(parameter->HasUses()); ASSERT_TRUE(parameter->GetUses().HasExactlyOneElement()); } -TEST(Node, ParentEnvironment) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); +TEST_F(NodeTest, ParentEnvironment) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter1 = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* with_environment = new (&allocator) HNullCheck(parameter1, 0); + HInstruction* parameter1 = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* with_environment = new (GetAllocator()) HNullCheck(parameter1, 0); entry->AddInstruction(parameter1); entry->AddInstruction(with_environment); - entry->AddInstruction(new (&allocator) HExit()); + entry->AddInstruction(new (GetAllocator()) HExit()); ASSERT_TRUE(parameter1->HasUses()); ASSERT_TRUE(parameter1->GetUses().HasExactlyOneElement()); - HEnvironment* environment = new (&allocator) HEnvironment( - &allocator, 1, graph->GetArtMethod(), 0, with_environment); - ArenaVector<HInstruction*> array(allocator.Adapter()); - array.push_back(parameter1); + HEnvironment* environment = new (GetAllocator()) HEnvironment( + GetAllocator(), 1, graph->GetArtMethod(), 0, with_environment); + HInstruction* const array[] = { parameter1 }; - environment->CopyFrom(array); + environment->CopyFrom(ArrayRef<HInstruction* const>(array)); with_environment->SetRawEnvironment(environment); ASSERT_TRUE(parameter1->HasEnvironmentUses()); ASSERT_TRUE(parameter1->GetEnvUses().HasExactlyOneElement()); - HEnvironment* parent1 = new (&allocator) HEnvironment( - &allocator, 1, graph->GetArtMethod(), 0, nullptr); - parent1->CopyFrom(array); + HEnvironment* parent1 = new (GetAllocator()) HEnvironment( + GetAllocator(), 1, graph->GetArtMethod(), 0, nullptr); + parent1->CopyFrom(ArrayRef<HInstruction* const>(array)); ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 2u); - HEnvironment* parent2 = new (&allocator) HEnvironment( - &allocator, 1, graph->GetArtMethod(), 0, nullptr); - parent2->CopyFrom(array); - parent1->SetAndCopyParentChain(&allocator, parent2); + HEnvironment* parent2 = new (GetAllocator()) HEnvironment( + GetAllocator(), 1, graph->GetArtMethod(), 0, nullptr); + parent2->CopyFrom(ArrayRef<HInstruction* const>(array)); + parent1->SetAndCopyParentChain(GetAllocator(), parent2); // One use for parent2, and one other use for the new parent of parent1. ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 4u); // We have copied the parent chain. So we now have two more uses. - environment->SetAndCopyParentChain(&allocator, parent1); + environment->SetAndCopyParentChain(GetAllocator(), parent1); ASSERT_EQ(parameter1->GetEnvUses().SizeSlow(), 6u); } diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h index 6261171a00..20f6cf01ed 100644 --- a/compiler/optimizing/nodes_vector.h +++ b/compiler/optimizing/nodes_vector.h @@ -34,7 +34,7 @@ class Alignment { DCHECK(IsPowerOfTwo(base)); } - // Returns true if memory is "at least" aligned at the given boundary. + // Returns true if memory is at least aligned at the given boundary. // Assumes requested base is power of two. bool IsAlignedAt(size_t base) const { DCHECK_NE(0u, base); @@ -42,6 +42,10 @@ class Alignment { return ((offset_ | base_) & (base - 1u)) == 0; } + size_t Base() const { return base_; } + + size_t Offset() const { return offset_; } + std::string ToString() const { return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")"; } @@ -63,15 +67,21 @@ class Alignment { // GetVectorLength() x GetPackedType() operations simultaneously. class HVecOperation : public HVariableInputSizeInstruction { public: - HVecOperation(ArenaAllocator* arena, - Primitive::Type packed_type, + // A SIMD operation looks like a FPU location. + // TODO: we could introduce SIMD types in HIR. + static constexpr DataType::Type kSIMDType = DataType::Type::kFloat64; + + HVecOperation(InstructionKind kind, + ArenaAllocator* allocator, + DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVariableInputSizeInstruction(side_effects, + : HVariableInputSizeInstruction(kind, + side_effects, dex_pc, - arena, + allocator, number_of_inputs, kArenaAllocVectorNode), vector_length_(vector_length) { @@ -86,22 +96,31 @@ class HVecOperation : public HVariableInputSizeInstruction { // Returns the number of bytes in a full vector. size_t GetVectorNumberOfBytes() const { - return vector_length_ * Primitive::ComponentSize(GetPackedType()); + return vector_length_ * DataType::Size(GetPackedType()); } - // Returns the type of the vector operation: a SIMD operation looks like a FPU location. - // TODO: we could introduce SIMD types in HIR. - Primitive::Type GetType() const OVERRIDE { - return Primitive::kPrimDouble; + // Returns the type of the vector operation. + DataType::Type GetType() const OVERRIDE { + return kSIMDType; } // Returns the true component type packed in a vector. - Primitive::Type GetPackedType() const { + DataType::Type GetPackedType() const { return GetPackedField<TypeField>(); } // Assumes vector nodes cannot be moved by default. Each concrete implementation // that can be moved should override this method and return true. + // + // Note: similar approach is used for instruction scheduling (if it is turned on for the target): + // by default HScheduler::IsSchedulable returns false for a particular HVecOperation. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. + // + // Note: For newly introduced vector instructions HScheduler${ARCH}::IsSchedulingBarrier must be + // altered to return true if the instruction might reside outside the SIMD loop body since SIMD + // registers are not kept alive across vector loop boundaries (yet). bool CanBeMoved() const OVERRIDE { return false; } // Tests if all data of a vector node (vector length and packed type) is equal. @@ -113,32 +132,83 @@ class HVecOperation : public HVariableInputSizeInstruction { return GetVectorLength() == o->GetVectorLength() && GetPackedType() == o->GetPackedType(); } + // Maps an integral type to the same-size signed type and leaves other types alone. + static DataType::Type ToSignedType(DataType::Type type) { + switch (type) { + case DataType::Type::kBool: // 1-byte storage unit + case DataType::Type::kUint8: + return DataType::Type::kInt8; + case DataType::Type::kUint16: + return DataType::Type::kInt16; + default: + DCHECK(type != DataType::Type::kVoid && type != DataType::Type::kReference) << type; + return type; + } + } + + // Maps an integral type to the same-size unsigned type and leaves other types alone. + static DataType::Type ToUnsignedType(DataType::Type type) { + switch (type) { + case DataType::Type::kBool: // 1-byte storage unit + case DataType::Type::kInt8: + return DataType::Type::kUint8; + case DataType::Type::kInt16: + return DataType::Type::kUint16; + default: + DCHECK(type != DataType::Type::kVoid && type != DataType::Type::kReference) << type; + return type; + } + } + + // Maps an integral type to the same-size (un)signed type. Leaves other types alone. + static DataType::Type ToProperType(DataType::Type type, bool is_unsigned) { + return is_unsigned ? ToUnsignedType(type) : ToSignedType(type); + } + + // Helper method to determine if an instruction returns a SIMD value. + // TODO: This method is needed until we introduce SIMD as proper type. + static bool ReturnsSIMDValue(HInstruction* instruction) { + if (instruction->IsVecOperation()) { + return !instruction->IsVecExtractScalar(); // only scalar returning vec op + } else if (instruction->IsPhi()) { + // Vectorizer only uses Phis in reductions, so checking for a 2-way phi + // with a direct vector operand as second argument suffices. + return + instruction->GetType() == kSIMDType && + instruction->InputCount() == 2 && + instruction->InputAt(1)->IsVecOperation(); + } + return false; + } + DECLARE_ABSTRACT_INSTRUCTION(VecOperation); protected: // Additional packed bits. static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits; static constexpr size_t kFieldTypeSize = - MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast)); + MinimumBitsToStore(static_cast<size_t>(DataType::Type::kLast)); static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize; static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>; + using TypeField = BitField<DataType::Type, kFieldType, kFieldTypeSize>; + + DEFAULT_COPY_CONSTRUCTOR(VecOperation); private: const size_t vector_length_; - - DISALLOW_COPY_AND_ASSIGN(HVecOperation); }; // Abstraction of a unary vector operation. class HVecUnaryOperation : public HVecOperation { public: - HVecUnaryOperation(ArenaAllocator* arena, + HVecUnaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* input, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(arena, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 1, @@ -151,20 +221,22 @@ class HVecUnaryOperation : public HVecOperation { DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation); - private: - DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecUnaryOperation); }; // Abstraction of a binary vector operation. class HVecBinaryOperation : public HVecOperation { public: - HVecBinaryOperation(ArenaAllocator* arena, + HVecBinaryOperation(InstructionKind kind, + ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, uint32_t dex_pc) - : HVecOperation(arena, + : HVecOperation(kind, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 2, @@ -179,23 +251,29 @@ class HVecBinaryOperation : public HVecOperation { DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation); - private: - DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecBinaryOperation); }; // Abstraction of a vector operation that references memory, with an alignment. -// The Android runtime guarantees at least "component size" alignment for array -// elements and, thus, vectors. +// The Android runtime guarantees elements have at least natural alignment. class HVecMemoryOperation : public HVecOperation { public: - HVecMemoryOperation(ArenaAllocator* arena, - Primitive::Type packed_type, + HVecMemoryOperation(InstructionKind kind, + ArenaAllocator* allocator, + DataType::Type packed_type, SideEffects side_effects, size_t number_of_inputs, size_t vector_length, uint32_t dex_pc) - : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc), - alignment_(Primitive::ComponentSize(packed_type), 0) { + : HVecOperation(kind, + allocator, + packed_type, + side_effects, + number_of_inputs, + vector_length, + dex_pc), + alignment_(DataType::Size(packed_type), 0) { DCHECK_GE(number_of_inputs, 2u); } @@ -214,28 +292,25 @@ class HVecMemoryOperation : public HVecOperation { DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecMemoryOperation); + private: Alignment alignment_; - - DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation); }; -// Packed type consistency checker (same vector length integral types may mix freely). -inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type type) { - DCHECK(input->IsVecOperation()); - Primitive::Type input_type = input->AsVecOperation()->GetPackedType(); - switch (input_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - return type == Primitive::kPrimBoolean || - type == Primitive::kPrimByte; - case Primitive::kPrimChar: - case Primitive::kPrimShort: - return type == Primitive::kPrimChar || - type == Primitive::kPrimShort; - default: - return type == input_type; +// Packed type consistency checker ("same vector length" integral types may mix freely). +// Tests relaxed type consistency in which packed same-size integral types can co-exist, +// but other type mixes are an error. +inline static bool HasConsistentPackedTypes(HInstruction* input, DataType::Type type) { + if (input->IsPhi()) { + return input->GetType() == HVecOperation::kSIMDType; // carries SIMD } + DCHECK(input->IsVecOperation()); + DataType::Type input_type = input->AsVecOperation()->GetPackedType(); + DCHECK_EQ(HVecOperation::ToUnsignedType(input_type) == HVecOperation::ToUnsignedType(type), + HVecOperation::ToSignedType(input_type) == HVecOperation::ToSignedType(type)); + return HVecOperation::ToSignedType(input_type) == HVecOperation::ToSignedType(type); } // @@ -246,13 +321,14 @@ inline static bool HasConsistentPackedTypes(HInstruction* input, Primitive::Type // viz. replicate(x) = [ x, .. , x ]. class HVecReplicateScalar FINAL : public HVecUnaryOperation { public: - HVecReplicateScalar(ArenaAllocator* arena, + HVecReplicateScalar(ArenaAllocator* allocator, HInstruction* scalar, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, scalar, packed_type, vector_length, dex_pc) { - DCHECK(!scalar->IsVecOperation()); + uint32_t dex_pc) + : HVecUnaryOperation( + kVecReplicateScalar, allocator, scalar, packed_type, vector_length, dex_pc) { + DCHECK(!ReturnsSIMDValue(scalar)); } // A replicate needs to stay in place, since SIMD registers are not @@ -261,68 +337,120 @@ class HVecReplicateScalar FINAL : public HVecUnaryOperation { DECLARE_INSTRUCTION(VecReplicateScalar); - private: - DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecReplicateScalar); }; -// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1), -// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j. -class HVecSumReduce FINAL : public HVecUnaryOperation { - HVecSumReduce(ArenaAllocator* arena, - HInstruction* input, - Primitive::Type packed_type, - size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { +// Extracts a particular scalar from the given vector, +// viz. extract[ x1, .. , xn ] = x_i. +// +// TODO: for now only i == 1 case supported. +class HVecExtractScalar FINAL : public HVecUnaryOperation { + public: + HVecExtractScalar(ArenaAllocator* allocator, + HInstruction* input, + DataType::Type packed_type, + size_t vector_length, + size_t index, + uint32_t dex_pc) + : HVecUnaryOperation( + kVecExtractScalar, allocator, input, packed_type, vector_length, dex_pc) { + DCHECK(HasConsistentPackedTypes(input, packed_type)); + DCHECK_LT(index, vector_length); + DCHECK_EQ(index, 0u); + } + + // Yields a single component in the vector. + DataType::Type GetType() const OVERRIDE { + return GetPackedType(); + } + + // An extract needs to stay in place, since SIMD registers are not + // kept alive across vector loop boundaries (yet). + bool CanBeMoved() const OVERRIDE { return false; } + + DECLARE_INSTRUCTION(VecExtractScalar); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecExtractScalar); +}; + +// Reduces the given vector into the first element as sum/min/max, +// viz. sum-reduce[ x1, .. , xn ] = [ y, ---- ], where y = sum xi +// and the "-" denotes "don't care" (implementation dependent). +class HVecReduce FINAL : public HVecUnaryOperation { + public: + enum ReductionKind { + kSum = 1, + kMin = 2, + kMax = 3 + }; + + HVecReduce(ArenaAllocator* allocator, + HInstruction* input, + DataType::Type packed_type, + size_t vector_length, + ReductionKind kind, + uint32_t dex_pc) + : HVecUnaryOperation(kVecReduce, allocator, input, packed_type, vector_length, dex_pc), + kind_(kind) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } - // TODO: probably integral promotion - Primitive::Type GetType() const OVERRIDE { return GetPackedType(); } + ReductionKind GetKind() const { return kind_; } bool CanBeMoved() const OVERRIDE { return true; } - DECLARE_INSTRUCTION(VecSumReduce); + bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { + DCHECK(other->IsVecReduce()); + const HVecReduce* o = other->AsVecReduce(); + return HVecOperation::InstructionDataEquals(o) && GetKind() == o->GetKind(); + } + + DECLARE_INSTRUCTION(VecReduce); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecReduce); private: - DISALLOW_COPY_AND_ASSIGN(HVecSumReduce); + const ReductionKind kind_; }; // Converts every component in the vector, // viz. cnv[ x1, .. , xn ] = [ cnv(x1), .. , cnv(xn) ]. class HVecCnv FINAL : public HVecUnaryOperation { public: - HVecCnv(ArenaAllocator* arena, + HVecCnv(ArenaAllocator* allocator, HInstruction* input, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecUnaryOperation(kVecCnv, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); DCHECK_NE(GetInputType(), GetResultType()); // actual convert } - Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } - Primitive::Type GetResultType() const { return GetPackedType(); } + DataType::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); } + DataType::Type GetResultType() const { return GetPackedType(); } bool CanBeMoved() const OVERRIDE { return true; } DECLARE_INSTRUCTION(VecCnv); - private: - DISALLOW_COPY_AND_ASSIGN(HVecCnv); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecCnv); }; // Negates every component in the vector, // viz. neg[ x1, .. , xn ] = [ -x1, .. , -xn ]. class HVecNeg FINAL : public HVecUnaryOperation { public: - HVecNeg(ArenaAllocator* arena, + HVecNeg(ArenaAllocator* allocator, HInstruction* input, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecUnaryOperation(kVecNeg, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -330,20 +458,21 @@ class HVecNeg FINAL : public HVecUnaryOperation { DECLARE_INSTRUCTION(VecNeg); - private: - DISALLOW_COPY_AND_ASSIGN(HVecNeg); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecNeg); }; // Takes absolute value of every component in the vector, -// viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ]. +// viz. abs[ x1, .. , xn ] = [ |x1|, .. , |xn| ] +// for signed operand x. class HVecAbs FINAL : public HVecUnaryOperation { public: - HVecAbs(ArenaAllocator* arena, + HVecAbs(ArenaAllocator* allocator, HInstruction* input, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecUnaryOperation(kVecAbs, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(input, packed_type)); } @@ -351,8 +480,8 @@ class HVecAbs FINAL : public HVecUnaryOperation { DECLARE_INSTRUCTION(VecAbs); - private: - DISALLOW_COPY_AND_ASSIGN(HVecAbs); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecAbs); }; // Bitwise- or boolean-nots every component in the vector, @@ -360,12 +489,12 @@ class HVecAbs FINAL : public HVecUnaryOperation { // not[ x1, .. , xn ] = [ !x1, .. , !xn ] for boolean. class HVecNot FINAL : public HVecUnaryOperation { public: - HVecNot(ArenaAllocator* arena, + HVecNot(ArenaAllocator* allocator, HInstruction* input, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecUnaryOperation(arena, input, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecUnaryOperation(kVecNot, allocator, input, packed_type, vector_length, dex_pc) { DCHECK(input->IsVecOperation()); } @@ -373,8 +502,8 @@ class HVecNot FINAL : public HVecUnaryOperation { DECLARE_INSTRUCTION(VecNot); - private: - DISALLOW_COPY_AND_ASSIGN(HVecNot); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecNot); }; // @@ -385,13 +514,13 @@ class HVecNot FINAL : public HVecUnaryOperation { // viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ]. class HVecAdd FINAL : public HVecBinaryOperation { public: - HVecAdd(ArenaAllocator* arena, + HVecAdd(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -400,32 +529,30 @@ class HVecAdd FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecAdd); - private: - DISALLOW_COPY_AND_ASSIGN(HVecAdd); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecAdd); }; // Performs halving add on every component in the two vectors, viz. -// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] -// or [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] -// for signed operands x, y (sign extension) or unsigned operands x, y (zero extension). +// rounded [ x1, .. , xn ] hradd [ y1, .. , yn ] = [ (x1 + y1 + 1) >> 1, .. , (xn + yn + 1) >> 1 ] +// truncated [ x1, .. , xn ] hadd [ y1, .. , yn ] = [ (x1 + y1) >> 1, .. , (xn + yn ) >> 1 ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecHalvingAdd FINAL : public HVecBinaryOperation { public: - HVecHalvingAdd(ArenaAllocator* arena, + HVecHalvingAdd(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - bool is_unsigned, bool is_rounded, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation( + kVecHalvingAdd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldHAddIsUnsigned>(is_unsigned); SetPackedFlag<kFieldHAddIsRounded>(is_rounded); } - bool IsUnsigned() const { return GetPackedFlag<kFieldHAddIsUnsigned>(); } bool IsRounded() const { return GetPackedFlag<kFieldHAddIsRounded>(); } bool CanBeMoved() const OVERRIDE { return true; } @@ -433,34 +560,32 @@ class HVecHalvingAdd FINAL : public HVecBinaryOperation { bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { DCHECK(other->IsVecHalvingAdd()); const HVecHalvingAdd* o = other->AsVecHalvingAdd(); - return HVecOperation::InstructionDataEquals(o) && - IsUnsigned() == o->IsUnsigned() && - IsRounded() == o->IsRounded(); + return HVecOperation::InstructionDataEquals(o) && IsRounded() == o->IsRounded(); } DECLARE_INSTRUCTION(VecHalvingAdd); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecHalvingAdd); + private: // Additional packed bits. - static constexpr size_t kFieldHAddIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kFieldHAddIsRounded = kFieldHAddIsUnsigned + 1; + static constexpr size_t kFieldHAddIsRounded = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kNumberOfHAddPackedBits = kFieldHAddIsRounded + 1; static_assert(kNumberOfHAddPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HVecHalvingAdd); }; // Subtracts every component in the two vectors, // viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ]. class HVecSub FINAL : public HVecBinaryOperation { public: - HVecSub(ArenaAllocator* arena, + HVecSub(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecSub, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -469,21 +594,21 @@ class HVecSub FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecSub); - private: - DISALLOW_COPY_AND_ASSIGN(HVecSub); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSub); }; // Multiplies every component in the two vectors, // viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ]. class HVecMul FINAL : public HVecBinaryOperation { public: - HVecMul(ArenaAllocator* arena, + HVecMul(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecMul, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -492,21 +617,21 @@ class HVecMul FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecMul); - private: - DISALLOW_COPY_AND_ASSIGN(HVecMul); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecMul); }; // Divides every component in the two vectors, // viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ]. class HVecDiv FINAL : public HVecBinaryOperation { public: - HVecDiv(ArenaAllocator* arena, + HVecDiv(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecDiv, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); } @@ -515,97 +640,69 @@ class HVecDiv FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecDiv); - private: - DISALLOW_COPY_AND_ASSIGN(HVecDiv); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecDiv); }; // Takes minimum of every component in the two vectors, -// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ]. +// viz. MIN( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ min(x1, y1), .. , min(xn, yn) ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMin FINAL : public HVecBinaryOperation { public: - HVecMin(ArenaAllocator* arena, + HVecMin(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - bool is_unsigned, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecMin, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMinOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMinOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMin()); - const HVecMin* o = other->AsVecMin(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMin); - private: - // Additional packed bits. - static constexpr size_t kFieldMinOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMinOpPackedBits = kFieldMinOpIsUnsigned + 1; - static_assert(kNumberOfMinOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HVecMin); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecMin); }; // Takes maximum of every component in the two vectors, -// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ]. +// viz. MAX( [ x1, .. , xn ] , [ y1, .. , yn ]) = [ max(x1, y1), .. , max(xn, yn) ] +// for either both signed or both unsigned operands x, y (reflected in packed_type). class HVecMax FINAL : public HVecBinaryOperation { public: - HVecMax(ArenaAllocator* arena, + HVecMax(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - bool is_unsigned, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecMax, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); DCHECK(HasConsistentPackedTypes(right, packed_type)); - SetPackedFlag<kFieldMaxOpIsUnsigned>(is_unsigned); } - bool IsUnsigned() const { return GetPackedFlag<kFieldMaxOpIsUnsigned>(); } - bool CanBeMoved() const OVERRIDE { return true; } - bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { - DCHECK(other->IsVecMax()); - const HVecMax* o = other->AsVecMax(); - return HVecOperation::InstructionDataEquals(o) && IsUnsigned() == o->IsUnsigned(); - } - DECLARE_INSTRUCTION(VecMax); - private: - // Additional packed bits. - static constexpr size_t kFieldMaxOpIsUnsigned = HVecOperation::kNumberOfVectorOpPackedBits; - static constexpr size_t kNumberOfMaxOpPackedBits = kFieldMaxOpIsUnsigned + 1; - static_assert(kNumberOfMaxOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HVecMax); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecMax); }; // Bitwise-ands every component in the two vectors, // viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ]. class HVecAnd FINAL : public HVecBinaryOperation { public: - HVecAnd(ArenaAllocator* arena, + HVecAnd(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecAnd, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -613,21 +710,22 @@ class HVecAnd FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecAnd); - private: - DISALLOW_COPY_AND_ASSIGN(HVecAnd); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecAnd); }; // Bitwise-and-nots every component in the two vectors, // viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ]. class HVecAndNot FINAL : public HVecBinaryOperation { public: - HVecAndNot(ArenaAllocator* arena, + HVecAndNot(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation( + kVecAndNot, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -635,21 +733,21 @@ class HVecAndNot FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecAndNot); - private: - DISALLOW_COPY_AND_ASSIGN(HVecAndNot); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecAndNot); }; // Bitwise-ors every component in the two vectors, // viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ]. class HVecOr FINAL : public HVecBinaryOperation { public: - HVecOr(ArenaAllocator* arena, + HVecOr(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecOr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -657,21 +755,21 @@ class HVecOr FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecOr); - private: - DISALLOW_COPY_AND_ASSIGN(HVecOr); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecOr); }; // Bitwise-xors every component in the two vectors, // viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ]. class HVecXor FINAL : public HVecBinaryOperation { public: - HVecXor(ArenaAllocator* arena, + HVecXor(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecXor, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(left->IsVecOperation() && right->IsVecOperation()); } @@ -679,21 +777,21 @@ class HVecXor FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecXor); - private: - DISALLOW_COPY_AND_ASSIGN(HVecXor); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecXor); }; // Logically shifts every component in the vector left by the given distance, // viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ]. class HVecShl FINAL : public HVecBinaryOperation { public: - HVecShl(ArenaAllocator* arena, + HVecShl(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecShl, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -701,21 +799,21 @@ class HVecShl FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecShl); - private: - DISALLOW_COPY_AND_ASSIGN(HVecShl); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecShl); }; // Arithmetically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ]. class HVecShr FINAL : public HVecBinaryOperation { public: - HVecShr(ArenaAllocator* arena, + HVecShr(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -723,21 +821,21 @@ class HVecShr FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecShr); - private: - DISALLOW_COPY_AND_ASSIGN(HVecShr); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecShr); }; // Logically shifts every component in the vector right by the given distance, // viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ]. class HVecUShr FINAL : public HVecBinaryOperation { public: - HVecUShr(ArenaAllocator* arena, + HVecUShr(ArenaAllocator* allocator, HInstruction* left, HInstruction* right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecBinaryOperation(arena, left, right, packed_type, vector_length, dex_pc) { + uint32_t dex_pc) + : HVecBinaryOperation(kVecUShr, allocator, left, right, packed_type, vector_length, dex_pc) { DCHECK(HasConsistentPackedTypes(left, packed_type)); } @@ -745,8 +843,8 @@ class HVecUShr FINAL : public HVecBinaryOperation { DECLARE_INSTRUCTION(VecUShr); - private: - DISALLOW_COPY_AND_ASSIGN(HVecUShr); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecUShr); }; // @@ -754,21 +852,25 @@ class HVecUShr FINAL : public HVecBinaryOperation { // // Assigns the given scalar elements to a vector, -// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ]. +// viz. set( array(x1, .. , xn) ) = [ x1, .. , xn ] if n == m, +// set( array(x1, .. , xm) ) = [ x1, .. , xm, 0, .. , 0 ] if m < n. class HVecSetScalars FINAL : public HVecOperation { - HVecSetScalars(ArenaAllocator* arena, - HInstruction** scalars, // array - Primitive::Type packed_type, + public: + HVecSetScalars(ArenaAllocator* allocator, + HInstruction* scalars[], + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecOperation(arena, + size_t number_of_scalars, + uint32_t dex_pc) + : HVecOperation(kVecSetScalars, + allocator, packed_type, SideEffects::None(), - /* number_of_inputs */ vector_length, + number_of_scalars, vector_length, dex_pc) { - for (size_t i = 0; i < vector_length; i++) { - DCHECK(!scalars[i]->IsVecOperation()); + for (size_t i = 0; i < number_of_scalars; i++) { + DCHECK(!ReturnsSIMDValue(scalars[i])); SetRawInputAt(0, scalars[i]); } } @@ -779,24 +881,24 @@ class HVecSetScalars FINAL : public HVecOperation { DECLARE_INSTRUCTION(VecSetScalars); - private: - DISALLOW_COPY_AND_ASSIGN(HVecSetScalars); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSetScalars); }; -// Multiplies every component in the two vectors, adds the result vector to the accumulator vector. -// viz. [ acc1, .., accn ] + [ x1, .. , xn ] * [ y1, .. , yn ] = -// [ acc1 + x1 * y1, .. , accn + xn * yn ]. +// Multiplies every component in the two vectors, adds the result vector to the accumulator vector, +// viz. [ a1, .. , an ] + [ x1, .. , xn ] * [ y1, .. , yn ] = [ a1 + x1 * y1, .. , an + xn * yn ]. class HVecMultiplyAccumulate FINAL : public HVecOperation { public: - HVecMultiplyAccumulate(ArenaAllocator* arena, + HVecMultiplyAccumulate(ArenaAllocator* allocator, InstructionKind op, HInstruction* accumulator, HInstruction* mul_left, HInstruction* mul_right, - Primitive::Type packed_type, + DataType::Type packed_type, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecOperation(arena, + uint32_t dex_pc) + : HVecOperation(kVecMultiplyAccumulate, + allocator, packed_type, SideEffects::None(), /* number_of_inputs */ 3, @@ -807,15 +909,11 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); DCHECK(HasConsistentPackedTypes(mul_left, packed_type)); DCHECK(HasConsistentPackedTypes(mul_right, packed_type)); - SetRawInputAt(kInputAccumulatorIndex, accumulator); - SetRawInputAt(kInputMulLeftIndex, mul_left); - SetRawInputAt(kInputMulRightIndex, mul_right); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, mul_left); + SetRawInputAt(2, mul_right); } - static constexpr int kInputAccumulatorIndex = 0; - static constexpr int kInputMulLeftIndex = 1; - static constexpr int kInputMulRightIndex = 2; - bool CanBeMoved() const OVERRIDE { return true; } bool InstructionDataEquals(const HInstruction* other) const OVERRIDE { @@ -828,27 +926,67 @@ class HVecMultiplyAccumulate FINAL : public HVecOperation { DECLARE_INSTRUCTION(VecMultiplyAccumulate); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecMultiplyAccumulate); + private: // Indicates if this is a MADD or MSUB. const InstructionKind op_kind_; +}; - DISALLOW_COPY_AND_ASSIGN(HVecMultiplyAccumulate); +// Takes the absolute difference of two vectors, and adds the results to +// same-precision or wider-precision components in the accumulator, +// viz. SAD([ a1, .. , am ], [ x1, .. , xn ], [ y1, .. , yn ]) = +// [ a1 + sum abs(xi-yi), .. , am + sum abs(xj-yj) ], +// for m <= n, non-overlapping sums, and signed operands x, y. +class HVecSADAccumulate FINAL : public HVecOperation { + public: + HVecSADAccumulate(ArenaAllocator* allocator, + HInstruction* accumulator, + HInstruction* sad_left, + HInstruction* sad_right, + DataType::Type packed_type, + size_t vector_length, + uint32_t dex_pc) + : HVecOperation(kVecSADAccumulate, + allocator, + packed_type, + SideEffects::None(), + /* number_of_inputs */ 3, + vector_length, + dex_pc) { + DCHECK(HasConsistentPackedTypes(accumulator, packed_type)); + DCHECK(sad_left->IsVecOperation()); + DCHECK(sad_right->IsVecOperation()); + DCHECK_EQ(ToSignedType(sad_left->AsVecOperation()->GetPackedType()), + ToSignedType(sad_right->AsVecOperation()->GetPackedType())); + SetRawInputAt(0, accumulator); + SetRawInputAt(1, sad_left); + SetRawInputAt(2, sad_right); + } + + DECLARE_INSTRUCTION(VecSADAccumulate); + + protected: + DEFAULT_COPY_CONSTRUCTOR(VecSADAccumulate); }; // Loads a vector from memory, viz. load(mem, 1) // yield the vector [ mem(1), .. , mem(n) ]. class HVecLoad FINAL : public HVecMemoryOperation { public: - HVecLoad(ArenaAllocator* arena, + HVecLoad(ArenaAllocator* allocator, HInstruction* base, HInstruction* index, - Primitive::Type packed_type, + DataType::Type packed_type, + SideEffects side_effects, size_t vector_length, bool is_string_char_at, - uint32_t dex_pc = kNoDexPc) - : HVecMemoryOperation(arena, + uint32_t dex_pc) + : HVecMemoryOperation(kVecLoad, + allocator, packed_type, - SideEffects::ArrayReadOfType(packed_type), + side_effects, /* number_of_inputs */ 2, vector_length, dex_pc) { @@ -869,29 +1007,32 @@ class HVecLoad FINAL : public HVecMemoryOperation { DECLARE_INSTRUCTION(VecLoad); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecLoad); + private: // Additional packed bits. static constexpr size_t kFieldIsStringCharAt = HVecOperation::kNumberOfVectorOpPackedBits; static constexpr size_t kNumberOfVecLoadPackedBits = kFieldIsStringCharAt + 1; static_assert(kNumberOfVecLoadPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields."); - - DISALLOW_COPY_AND_ASSIGN(HVecLoad); }; // Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] ) // sets mem(1) = x1, .. , mem(n) = xn. class HVecStore FINAL : public HVecMemoryOperation { public: - HVecStore(ArenaAllocator* arena, + HVecStore(ArenaAllocator* allocator, HInstruction* base, HInstruction* index, HInstruction* value, - Primitive::Type packed_type, + DataType::Type packed_type, + SideEffects side_effects, size_t vector_length, - uint32_t dex_pc = kNoDexPc) - : HVecMemoryOperation(arena, + uint32_t dex_pc) + : HVecMemoryOperation(kVecStore, + allocator, packed_type, - SideEffects::ArrayWriteOfType(packed_type), + side_effects, /* number_of_inputs */ 3, vector_length, dex_pc) { @@ -906,8 +1047,8 @@ class HVecStore FINAL : public HVecMemoryOperation { DECLARE_INSTRUCTION(VecStore); - private: - DISALLOW_COPY_AND_ASSIGN(HVecStore); + protected: + DEFAULT_COPY_CONSTRUCTOR(VecStore) }; } // namespace art diff --git a/compiler/optimizing/nodes_vector_test.cc b/compiler/optimizing/nodes_vector_test.cc index 0238ea4602..af13449646 100644 --- a/compiler/optimizing/nodes_vector_test.cc +++ b/compiler/optimizing/nodes_vector_test.cc @@ -23,12 +23,10 @@ namespace art { /** * Fixture class for testing vector nodes. */ -class NodesVectorTest : public CommonCompilerTest { +class NodesVectorTest : public OptimizingUnitTest { public: NodesVectorTest() - : pool_(), - allocator_(&pool_), - graph_(CreateGraph(&allocator_)) { + : graph_(CreateGraph()) { BuildGraph(); } @@ -36,28 +34,38 @@ class NodesVectorTest : public CommonCompilerTest { void BuildGraph() { graph_->SetNumberOfVRegs(1); - entry_block_ = new (&allocator_) HBasicBlock(graph_); - exit_block_ = new (&allocator_) HBasicBlock(graph_); + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_block_); graph_->AddBlock(exit_block_); graph_->SetEntryBlock(entry_block_); graph_->SetExitBlock(exit_block_); - parameter_ = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimInt); - entry_block_->AddInstruction(parameter_); + int8_parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 0, + DataType::Type::kInt8); + entry_block_->AddInstruction(int8_parameter_); + int16_parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(2), + 0, + DataType::Type::kInt16); + entry_block_->AddInstruction(int16_parameter_); + int32_parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(int32_parameter_); } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; HBasicBlock* entry_block_; HBasicBlock* exit_block_; - HInstruction* parameter_; + HInstruction* int8_parameter_; + HInstruction* int16_parameter_; + HInstruction* int32_parameter_; }; // @@ -92,6 +100,10 @@ TEST(NodesVector, Alignment) { EXPECT_FALSE(Alignment(16, 1).IsAlignedAt(16)); EXPECT_FALSE(Alignment(16, 7).IsAlignedAt(16)); EXPECT_FALSE(Alignment(16, 0).IsAlignedAt(32)); + + EXPECT_EQ(16u, Alignment(16, 0).Base()); + EXPECT_EQ(0u, Alignment(16, 0).Offset()); + EXPECT_EQ(4u, Alignment(16, 4).Offset()); } TEST(NodesVector, AlignmentEQ) { @@ -118,16 +130,23 @@ TEST(NodesVector, AlignmentString) { } TEST_F(NodesVectorTest, VectorOperationProperties) { - HVecOperation* v0 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); - HVecOperation* v1 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); - HVecOperation* v2 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 2); - HVecOperation* v3 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimShort, 4); - HVecOperation* v4 = new (&allocator_) - HVecStore(&allocator_, parameter_, parameter_, v0, Primitive::kPrimInt, 4); + HVecOperation* v0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); + HVecOperation* v1 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); + HVecOperation* v2 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 2, kNoDexPc); + HVecOperation* v3 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt16, 4, kNoDexPc); + HVecOperation* v4 = new (GetAllocator()) HVecStore( + GetAllocator(), + int32_parameter_, + int32_parameter_, + v0, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); EXPECT_TRUE(v0->Equals(v0)); EXPECT_TRUE(v1->Equals(v1)); @@ -149,17 +168,17 @@ TEST_F(NodesVectorTest, VectorOperationProperties) { EXPECT_EQ(4u, v3->GetVectorLength()); EXPECT_EQ(4u, v4->GetVectorLength()); - EXPECT_EQ(Primitive::kPrimDouble, v0->GetType()); - EXPECT_EQ(Primitive::kPrimDouble, v1->GetType()); - EXPECT_EQ(Primitive::kPrimDouble, v2->GetType()); - EXPECT_EQ(Primitive::kPrimDouble, v3->GetType()); - EXPECT_EQ(Primitive::kPrimDouble, v4->GetType()); + EXPECT_EQ(DataType::Type::kFloat64, v0->GetType()); + EXPECT_EQ(DataType::Type::kFloat64, v1->GetType()); + EXPECT_EQ(DataType::Type::kFloat64, v2->GetType()); + EXPECT_EQ(DataType::Type::kFloat64, v3->GetType()); + EXPECT_EQ(DataType::Type::kFloat64, v4->GetType()); - EXPECT_EQ(Primitive::kPrimInt, v0->GetPackedType()); - EXPECT_EQ(Primitive::kPrimInt, v1->GetPackedType()); - EXPECT_EQ(Primitive::kPrimInt, v2->GetPackedType()); - EXPECT_EQ(Primitive::kPrimShort, v3->GetPackedType()); - EXPECT_EQ(Primitive::kPrimInt, v4->GetPackedType()); + EXPECT_EQ(DataType::Type::kInt32, v0->GetPackedType()); + EXPECT_EQ(DataType::Type::kInt32, v1->GetPackedType()); + EXPECT_EQ(DataType::Type::kInt32, v2->GetPackedType()); + EXPECT_EQ(DataType::Type::kInt16, v3->GetPackedType()); + EXPECT_EQ(DataType::Type::kInt32, v4->GetPackedType()); EXPECT_EQ(16u, v0->GetVectorNumberOfBytes()); EXPECT_EQ(16u, v1->GetVectorNumberOfBytes()); @@ -175,12 +194,30 @@ TEST_F(NodesVectorTest, VectorOperationProperties) { } TEST_F(NodesVectorTest, VectorAlignmentAndStringCharAtMatterOnLoad) { - HVecLoad* v0 = new (&allocator_) - HVecLoad(&allocator_, parameter_, parameter_, Primitive::kPrimInt, 4, /*is_string_char_at*/ false); - HVecLoad* v1 = new (&allocator_) - HVecLoad(&allocator_, parameter_, parameter_, Primitive::kPrimInt, 4, /*is_string_char_at*/ false); - HVecLoad* v2 = new (&allocator_) - HVecLoad(&allocator_, parameter_, parameter_, Primitive::kPrimInt, 4, /*is_string_char_at*/ true); + HVecLoad* v0 = new (GetAllocator()) HVecLoad(GetAllocator(), + int32_parameter_, + int32_parameter_, + DataType::Type::kInt32, + SideEffects::ArrayReadOfType(DataType::Type::kInt32), + 4, + /*is_string_char_at*/ false, + kNoDexPc); + HVecLoad* v1 = new (GetAllocator()) HVecLoad(GetAllocator(), + int32_parameter_, + int32_parameter_, + DataType::Type::kInt32, + SideEffects::ArrayReadOfType(DataType::Type::kInt32), + 4, + /*is_string_char_at*/ false, + kNoDexPc); + HVecLoad* v2 = new (GetAllocator()) HVecLoad(GetAllocator(), + int32_parameter_, + int32_parameter_, + DataType::Type::kInt32, + SideEffects::ArrayReadOfType(DataType::Type::kInt32), + 4, + /*is_string_char_at*/ true, + kNoDexPc); EXPECT_TRUE(v0->CanBeMoved()); EXPECT_TRUE(v1->CanBeMoved()); @@ -195,7 +232,7 @@ TEST_F(NodesVectorTest, VectorAlignmentAndStringCharAtMatterOnLoad) { EXPECT_TRUE(v2->Equals(v2)); EXPECT_TRUE(v0->Equals(v1)); - EXPECT_FALSE(v0->Equals(v2)); + EXPECT_FALSE(v0->Equals(v2)); // different is_string_char_at EXPECT_TRUE(v0->GetAlignment() == Alignment(4, 0)); EXPECT_TRUE(v1->GetAlignment() == Alignment(4, 0)); @@ -208,128 +245,172 @@ TEST_F(NodesVectorTest, VectorAlignmentAndStringCharAtMatterOnLoad) { EXPECT_FALSE(v0->Equals(v1)); // no longer equal } -TEST_F(NodesVectorTest, VectorSignMattersOnMin) { - HVecOperation* v0 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); - - HVecMin* v1 = new (&allocator_) - HVecMin(&allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ true); - HVecMin* v2 = new (&allocator_) - HVecMin(&allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ false); - HVecMin* v3 = new (&allocator_) - HVecMin(&allocator_, v0, v0, Primitive::kPrimInt, 2, /*is_unsigned*/ true); +TEST_F(NodesVectorTest, VectorAlignmentMattersOnStore) { + HVecOperation* p0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); + HVecStore* v0 = new (GetAllocator()) HVecStore( + GetAllocator(), + int32_parameter_, + int32_parameter_, + p0, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); + HVecStore* v1 = new (GetAllocator()) HVecStore( + GetAllocator(), + int32_parameter_, + int32_parameter_, + p0, + DataType::Type::kInt32, + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + 4, + kNoDexPc); EXPECT_FALSE(v0->CanBeMoved()); - EXPECT_TRUE(v1->CanBeMoved()); - EXPECT_TRUE(v2->CanBeMoved()); - EXPECT_TRUE(v3->CanBeMoved()); - - EXPECT_TRUE(v1->IsUnsigned()); - EXPECT_FALSE(v2->IsUnsigned()); - EXPECT_TRUE(v3->IsUnsigned()); - - EXPECT_TRUE(v1->Equals(v1)); - EXPECT_TRUE(v2->Equals(v2)); - EXPECT_TRUE(v3->Equals(v3)); + EXPECT_FALSE(v1->CanBeMoved()); - EXPECT_FALSE(v1->Equals(v2)); // different signs - EXPECT_FALSE(v1->Equals(v3)); // different vector lengths -} + EXPECT_TRUE(v0->Equals(v1)); -TEST_F(NodesVectorTest, VectorSignMattersOnMax) { - HVecOperation* v0 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); + EXPECT_TRUE(v0->GetAlignment() == Alignment(4, 0)); + EXPECT_TRUE(v1->GetAlignment() == Alignment(4, 0)); - HVecMax* v1 = new (&allocator_) - HVecMax(&allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ true); - HVecMax* v2 = new (&allocator_) - HVecMax(&allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ false); - HVecMax* v3 = new (&allocator_) - HVecMax(&allocator_, v0, v0, Primitive::kPrimInt, 2, /*is_unsigned*/ true); + v1->SetAlignment(Alignment(8, 0)); - EXPECT_FALSE(v0->CanBeMoved()); - EXPECT_TRUE(v1->CanBeMoved()); - EXPECT_TRUE(v2->CanBeMoved()); - EXPECT_TRUE(v3->CanBeMoved()); + EXPECT_TRUE(v1->GetAlignment() == Alignment(8, 0)); - EXPECT_TRUE(v1->IsUnsigned()); - EXPECT_FALSE(v2->IsUnsigned()); - EXPECT_TRUE(v3->IsUnsigned()); + EXPECT_FALSE(v0->Equals(v1)); // no longer equal +} - EXPECT_TRUE(v1->Equals(v1)); - EXPECT_TRUE(v2->Equals(v2)); - EXPECT_TRUE(v3->Equals(v3)); +TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { + HVecOperation* u0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kUint32, 4, kNoDexPc); + HVecOperation* u1 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kUint16, 8, kNoDexPc); + HVecOperation* u2 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kUint8, 16, kNoDexPc); + + HVecOperation* p0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); + HVecOperation* p1 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int16_parameter_, DataType::Type::kInt16, 8, kNoDexPc); + HVecOperation* p2 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int8_parameter_, DataType::Type::kInt8, 16, kNoDexPc); + + HVecHalvingAdd* v0 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v1 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u0, u0, DataType::Type::kUint32, 4, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v2 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v3 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p0, p0, DataType::Type::kInt32, 4, /*is_rounded*/ false, kNoDexPc); + + HVecHalvingAdd* v4 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v5 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u1, u1, DataType::Type::kUint16, 8, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v6 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v7 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p1, p1, DataType::Type::kInt16, 8, /*is_rounded*/ false, kNoDexPc); + + HVecHalvingAdd* v8 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v9 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), u2, u2, DataType::Type::kUint8, 16, /*is_rounded*/ false, kNoDexPc); + HVecHalvingAdd* v10 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ true, kNoDexPc); + HVecHalvingAdd* v11 = new (GetAllocator()) HVecHalvingAdd( + GetAllocator(), p2, p2, DataType::Type::kInt8, 16, /*is_rounded*/ false, kNoDexPc); + + HVecHalvingAdd* hadd_insns[] = { v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11 }; + + EXPECT_FALSE(u0->CanBeMoved()); + EXPECT_FALSE(u1->CanBeMoved()); + EXPECT_FALSE(u2->CanBeMoved()); + EXPECT_FALSE(p0->CanBeMoved()); + EXPECT_FALSE(p1->CanBeMoved()); + EXPECT_FALSE(p2->CanBeMoved()); + + for (HVecHalvingAdd* hadd_insn : hadd_insns) { + EXPECT_TRUE(hadd_insn->CanBeMoved()); + } - EXPECT_FALSE(v1->Equals(v2)); // different signs - EXPECT_FALSE(v1->Equals(v3)); // different vector lengths + EXPECT_TRUE(v0->IsRounded()); + EXPECT_TRUE(!v1->IsRounded()); + EXPECT_TRUE(v2->IsRounded()); + EXPECT_TRUE(!v3->IsRounded()); + EXPECT_TRUE(v4->IsRounded()); + EXPECT_TRUE(!v5->IsRounded()); + EXPECT_TRUE(v6->IsRounded()); + EXPECT_TRUE(!v7->IsRounded()); + EXPECT_TRUE(v8->IsRounded()); + EXPECT_TRUE(!v9->IsRounded()); + EXPECT_TRUE(v10->IsRounded()); + EXPECT_TRUE(!v11->IsRounded()); + + for (HVecHalvingAdd* hadd_insn1 : hadd_insns) { + for (HVecHalvingAdd* hadd_insn2 : hadd_insns) { + EXPECT_EQ(hadd_insn1 == hadd_insn2, hadd_insn1->Equals(hadd_insn2)); + } + } } -TEST_F(NodesVectorTest, VectorAttributesMatterOnHalvingAdd) { - HVecOperation* v0 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); - - HVecHalvingAdd* v1 = new (&allocator_) HVecHalvingAdd( - &allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ true, /*is_rounded*/ true); - HVecHalvingAdd* v2 = new (&allocator_) HVecHalvingAdd( - &allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ true, /*is_rounded*/ false); - HVecHalvingAdd* v3 = new (&allocator_) HVecHalvingAdd( - &allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ false, /*is_rounded*/ true); - HVecHalvingAdd* v4 = new (&allocator_) HVecHalvingAdd( - &allocator_, v0, v0, Primitive::kPrimInt, 4, /*is_unsigned*/ false, /*is_rounded*/ false); - HVecHalvingAdd* v5 = new (&allocator_) HVecHalvingAdd( - &allocator_, v0, v0, Primitive::kPrimInt, 2, /*is_unsigned*/ true, /*is_rounded*/ true); +TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) { + HVecOperation* v0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); + + HVecMultiplyAccumulate* v1 = new (GetAllocator()) HVecMultiplyAccumulate( + GetAllocator(), HInstruction::kAdd, v0, v0, v0, DataType::Type::kInt32, 4, kNoDexPc); + HVecMultiplyAccumulate* v2 = new (GetAllocator()) HVecMultiplyAccumulate( + GetAllocator(), HInstruction::kSub, v0, v0, v0, DataType::Type::kInt32, 4, kNoDexPc); + HVecMultiplyAccumulate* v3 = new (GetAllocator()) HVecMultiplyAccumulate( + GetAllocator(), HInstruction::kAdd, v0, v0, v0, DataType::Type::kInt32, 2, kNoDexPc); EXPECT_FALSE(v0->CanBeMoved()); EXPECT_TRUE(v1->CanBeMoved()); EXPECT_TRUE(v2->CanBeMoved()); EXPECT_TRUE(v3->CanBeMoved()); - EXPECT_TRUE(v4->CanBeMoved()); - EXPECT_TRUE(v5->CanBeMoved()); + + EXPECT_EQ(HInstruction::kAdd, v1->GetOpKind()); + EXPECT_EQ(HInstruction::kSub, v2->GetOpKind()); + EXPECT_EQ(HInstruction::kAdd, v3->GetOpKind()); EXPECT_TRUE(v1->Equals(v1)); EXPECT_TRUE(v2->Equals(v2)); EXPECT_TRUE(v3->Equals(v3)); - EXPECT_TRUE(v4->Equals(v4)); - EXPECT_TRUE(v5->Equals(v5)); - - EXPECT_TRUE(v1->IsUnsigned() && v1->IsRounded()); - EXPECT_TRUE(v2->IsUnsigned() && !v2->IsRounded()); - EXPECT_TRUE(!v3->IsUnsigned() && v3->IsRounded()); - EXPECT_TRUE(!v4->IsUnsigned() && !v4->IsRounded()); - EXPECT_TRUE(v5->IsUnsigned() && v5->IsRounded()); - - EXPECT_FALSE(v1->Equals(v2)); // different attributes - EXPECT_FALSE(v1->Equals(v3)); // different attributes - EXPECT_FALSE(v1->Equals(v4)); // different attributes - EXPECT_FALSE(v1->Equals(v5)); // different vector lengths + + EXPECT_FALSE(v1->Equals(v2)); // different operators + EXPECT_FALSE(v1->Equals(v3)); // different vector lengths } -TEST_F(NodesVectorTest, VectorOperationMattersOnMultiplyAccumulate) { - HVecOperation* v0 = new (&allocator_) - HVecReplicateScalar(&allocator_, parameter_, Primitive::kPrimInt, 4); +TEST_F(NodesVectorTest, VectorKindMattersOnReduce) { + HVecOperation* v0 = new (GetAllocator()) + HVecReplicateScalar(GetAllocator(), int32_parameter_, DataType::Type::kInt32, 4, kNoDexPc); - HVecMultiplyAccumulate* v1 = new (&allocator_) - HVecMultiplyAccumulate(&allocator_, HInstruction::kAdd, v0, v0, v0, Primitive::kPrimInt, 4); - HVecMultiplyAccumulate* v2 = new (&allocator_) - HVecMultiplyAccumulate(&allocator_, HInstruction::kSub, v0, v0, v0, Primitive::kPrimInt, 4); - HVecMultiplyAccumulate* v3 = new (&allocator_) - HVecMultiplyAccumulate(&allocator_, HInstruction::kAdd, v0, v0, v0, Primitive::kPrimInt, 2); + HVecReduce* v1 = new (GetAllocator()) HVecReduce( + GetAllocator(), v0, DataType::Type::kInt32, 4, HVecReduce::kSum, kNoDexPc); + HVecReduce* v2 = new (GetAllocator()) HVecReduce( + GetAllocator(), v0, DataType::Type::kInt32, 4, HVecReduce::kMin, kNoDexPc); + HVecReduce* v3 = new (GetAllocator()) HVecReduce( + GetAllocator(), v0, DataType::Type::kInt32, 4, HVecReduce::kMax, kNoDexPc); EXPECT_FALSE(v0->CanBeMoved()); EXPECT_TRUE(v1->CanBeMoved()); EXPECT_TRUE(v2->CanBeMoved()); EXPECT_TRUE(v3->CanBeMoved()); - EXPECT_EQ(HInstruction::kAdd, v1->GetOpKind()); - EXPECT_EQ(HInstruction::kSub, v2->GetOpKind()); - EXPECT_EQ(HInstruction::kAdd, v3->GetOpKind()); + EXPECT_EQ(HVecReduce::kSum, v1->GetKind()); + EXPECT_EQ(HVecReduce::kMin, v2->GetKind()); + EXPECT_EQ(HVecReduce::kMax, v3->GetKind()); EXPECT_TRUE(v1->Equals(v1)); EXPECT_TRUE(v2->Equals(v2)); EXPECT_TRUE(v3->Equals(v3)); - EXPECT_FALSE(v1->Equals(v2)); // different operators - EXPECT_FALSE(v1->Equals(v3)); // different vector lengths + EXPECT_FALSE(v1->Equals(v2)); // different kinds + EXPECT_FALSE(v1->Equals(v3)); } } // namespace art diff --git a/compiler/optimizing/nodes_x86.h b/compiler/optimizing/nodes_x86.h index 75893c3129..4c32be7d15 100644 --- a/compiler/optimizing/nodes_x86.h +++ b/compiler/optimizing/nodes_x86.h @@ -24,14 +24,18 @@ class HX86ComputeBaseMethodAddress FINAL : public HExpression<0> { public: // Treat the value as an int32_t, but it is really a 32 bit native pointer. HX86ComputeBaseMethodAddress() - : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc) {} + : HExpression(kX86ComputeBaseMethodAddress, + DataType::Type::kInt32, + SideEffects::None(), + kNoDexPc) { + } bool CanBeMoved() const OVERRIDE { return true; } DECLARE_INSTRUCTION(X86ComputeBaseMethodAddress); - private: - DISALLOW_COPY_AND_ASSIGN(HX86ComputeBaseMethodAddress); + protected: + DEFAULT_COPY_CONSTRUCTOR(X86ComputeBaseMethodAddress); }; // Load a constant value from the constant table. @@ -39,7 +43,10 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { public: HX86LoadFromConstantTable(HX86ComputeBaseMethodAddress* method_base, HConstant* constant) - : HExpression(constant->GetType(), SideEffects::None(), kNoDexPc) { + : HExpression(kX86LoadFromConstantTable, + constant->GetType(), + SideEffects::None(), + kNoDexPc) { SetRawInputAt(0, method_base); SetRawInputAt(1, constant); } @@ -54,19 +61,19 @@ class HX86LoadFromConstantTable FINAL : public HExpression<2> { DECLARE_INSTRUCTION(X86LoadFromConstantTable); - private: - DISALLOW_COPY_AND_ASSIGN(HX86LoadFromConstantTable); + protected: + DEFAULT_COPY_CONSTRUCTOR(X86LoadFromConstantTable); }; // Version of HNeg with access to the constant table for FP types. class HX86FPNeg FINAL : public HExpression<2> { public: - HX86FPNeg(Primitive::Type result_type, + HX86FPNeg(DataType::Type result_type, HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HExpression(result_type, SideEffects::None(), dex_pc) { - DCHECK(Primitive::IsFloatingPointType(result_type)); + : HExpression(kX86FPNeg, result_type, SideEffects::None(), dex_pc) { + DCHECK(DataType::IsFloatingPointType(result_type)); SetRawInputAt(0, input); SetRawInputAt(1, method_base); } @@ -77,8 +84,8 @@ class HX86FPNeg FINAL : public HExpression<2> { DECLARE_INSTRUCTION(X86FPNeg); - private: - DISALLOW_COPY_AND_ASSIGN(HX86FPNeg); + protected: + DEFAULT_COPY_CONSTRUCTOR(X86FPNeg); }; // X86 version of HPackedSwitch that holds a pointer to the base method address. @@ -89,7 +96,7 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { HInstruction* input, HX86ComputeBaseMethodAddress* method_base, uint32_t dex_pc) - : HTemplateInstruction(SideEffects::None(), dex_pc), + : HTemplateInstruction(kX86PackedSwitch, SideEffects::None(), dex_pc), start_value_(start_value), num_entries_(num_entries) { SetRawInputAt(0, input); @@ -113,11 +120,12 @@ class HX86PackedSwitch FINAL : public HTemplateInstruction<2> { DECLARE_INSTRUCTION(X86PackedSwitch); + protected: + DEFAULT_COPY_CONSTRUCTOR(X86PackedSwitch); + private: const int32_t start_value_; const int32_t num_entries_; - - DISALLOW_COPY_AND_ASSIGN(HX86PackedSwitch); }; } // namespace art diff --git a/compiler/optimizing/optimization.cc b/compiler/optimizing/optimization.cc index 3d769491a5..57db7a634c 100644 --- a/compiler/optimizing/optimization.cc +++ b/compiler/optimizing/optimization.cc @@ -16,12 +16,318 @@ #include "optimization.h" +#ifdef ART_ENABLE_CODEGEN_arm +#include "instruction_simplifier_arm.h" +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 +#include "instruction_simplifier_arm64.h" +#endif +#ifdef ART_ENABLE_CODEGEN_mips +#include "instruction_simplifier_mips.h" +#include "pc_relative_fixups_mips.h" +#endif +#ifdef ART_ENABLE_CODEGEN_x86 +#include "pc_relative_fixups_x86.h" +#endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) +#include "x86_memory_gen.h" +#endif + +#include "bounds_check_elimination.h" +#include "cha_guard_optimization.h" +#include "code_sinking.h" +#include "constant_folding.h" +#include "constructor_fence_redundancy_elimination.h" +#include "dead_code_elimination.h" +#include "dex/code_item_accessors-inl.h" +#include "driver/dex_compilation_unit.h" +#include "gvn.h" +#include "induction_var_analysis.h" +#include "inliner.h" +#include "instruction_simplifier.h" +#include "intrinsics.h" +#include "licm.h" +#include "load_store_analysis.h" +#include "load_store_elimination.h" +#include "loop_optimization.h" +#include "scheduler.h" +#include "select_generator.h" +#include "sharpening.h" +#include "side_effects_analysis.h" + +// Decide between default or alternative pass name. + namespace art { -void HOptimization::MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count) const { - if (stats_ != nullptr) { - stats_->RecordStat(compilation_stat, count); +const char* OptimizationPassName(OptimizationPass pass) { + switch (pass) { + case OptimizationPass::kSideEffectsAnalysis: + return SideEffectsAnalysis::kSideEffectsAnalysisPassName; + case OptimizationPass::kInductionVarAnalysis: + return HInductionVarAnalysis::kInductionPassName; + case OptimizationPass::kLoadStoreAnalysis: + return LoadStoreAnalysis::kLoadStoreAnalysisPassName; + case OptimizationPass::kGlobalValueNumbering: + return GVNOptimization::kGlobalValueNumberingPassName; + case OptimizationPass::kInvariantCodeMotion: + return LICM::kLoopInvariantCodeMotionPassName; + case OptimizationPass::kLoopOptimization: + return HLoopOptimization::kLoopOptimizationPassName; + case OptimizationPass::kBoundsCheckElimination: + return BoundsCheckElimination::kBoundsCheckEliminationPassName; + case OptimizationPass::kLoadStoreElimination: + return LoadStoreElimination::kLoadStoreEliminationPassName; + case OptimizationPass::kConstantFolding: + return HConstantFolding::kConstantFoldingPassName; + case OptimizationPass::kDeadCodeElimination: + return HDeadCodeElimination::kDeadCodeEliminationPassName; + case OptimizationPass::kInliner: + return HInliner::kInlinerPassName; + case OptimizationPass::kSharpening: + return HSharpening::kSharpeningPassName; + case OptimizationPass::kSelectGenerator: + return HSelectGenerator::kSelectGeneratorPassName; + case OptimizationPass::kInstructionSimplifier: + return InstructionSimplifier::kInstructionSimplifierPassName; + case OptimizationPass::kIntrinsicsRecognizer: + return IntrinsicsRecognizer::kIntrinsicsRecognizerPassName; + case OptimizationPass::kCHAGuardOptimization: + return CHAGuardOptimization::kCHAGuardOptimizationPassName; + case OptimizationPass::kCodeSinking: + return CodeSinking::kCodeSinkingPassName; + case OptimizationPass::kConstructorFenceRedundancyElimination: + return ConstructorFenceRedundancyElimination::kCFREPassName; + case OptimizationPass::kScheduling: + return HInstructionScheduling::kInstructionSchedulingPassName; +#ifdef ART_ENABLE_CODEGEN_arm + case OptimizationPass::kInstructionSimplifierArm: + return arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName; +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + case OptimizationPass::kInstructionSimplifierArm64: + return arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName; +#endif +#ifdef ART_ENABLE_CODEGEN_mips + case OptimizationPass::kPcRelativeFixupsMips: + return mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName; + case OptimizationPass::kInstructionSimplifierMips: + return mips::InstructionSimplifierMips::kInstructionSimplifierMipsPassName; +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + case OptimizationPass::kPcRelativeFixupsX86: + return x86::PcRelativeFixups::kPcRelativeFixupsX86PassName; +#endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) + case OptimizationPass::kX86MemoryOperandGeneration: + return x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName; +#endif } } +#define X(x) if (name == OptimizationPassName((x))) return (x) + +OptimizationPass OptimizationPassByName(const std::string& name) { + X(OptimizationPass::kBoundsCheckElimination); + X(OptimizationPass::kCHAGuardOptimization); + X(OptimizationPass::kCodeSinking); + X(OptimizationPass::kConstantFolding); + X(OptimizationPass::kConstructorFenceRedundancyElimination); + X(OptimizationPass::kDeadCodeElimination); + X(OptimizationPass::kGlobalValueNumbering); + X(OptimizationPass::kInductionVarAnalysis); + X(OptimizationPass::kInliner); + X(OptimizationPass::kInstructionSimplifier); + X(OptimizationPass::kIntrinsicsRecognizer); + X(OptimizationPass::kInvariantCodeMotion); + X(OptimizationPass::kLoadStoreAnalysis); + X(OptimizationPass::kLoadStoreElimination); + X(OptimizationPass::kLoopOptimization); + X(OptimizationPass::kScheduling); + X(OptimizationPass::kSelectGenerator); + X(OptimizationPass::kSharpening); + X(OptimizationPass::kSideEffectsAnalysis); +#ifdef ART_ENABLE_CODEGEN_arm + X(OptimizationPass::kInstructionSimplifierArm); +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + X(OptimizationPass::kInstructionSimplifierArm64); +#endif +#ifdef ART_ENABLE_CODEGEN_mips + X(OptimizationPass::kPcRelativeFixupsMips); + X(OptimizationPass::kInstructionSimplifierMips); +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + X(OptimizationPass::kPcRelativeFixupsX86); + X(OptimizationPass::kX86MemoryOperandGeneration); +#endif + LOG(FATAL) << "Cannot find optimization " << name; + UNREACHABLE(); +} + +#undef X + +ArenaVector<HOptimization*> ConstructOptimizations( + const OptimizationDef definitions[], + size_t length, + ArenaAllocator* allocator, + HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles) { + ArenaVector<HOptimization*> optimizations(allocator->Adapter()); + + // Some optimizations require SideEffectsAnalysis or HInductionVarAnalysis + // instances. This method uses the nearest instance preceeding it in the pass + // name list or fails fatally if no such analysis can be found. + SideEffectsAnalysis* most_recent_side_effects = nullptr; + HInductionVarAnalysis* most_recent_induction = nullptr; + LoadStoreAnalysis* most_recent_lsa = nullptr; + + // Loop over the requested optimizations. + for (size_t i = 0; i < length; i++) { + OptimizationPass pass = definitions[i].first; + const char* alt_name = definitions[i].second; + const char* name = alt_name != nullptr + ? alt_name + : OptimizationPassName(pass); + HOptimization* opt = nullptr; + + switch (pass) { + // + // Analysis passes (kept in most recent for subsequent passes). + // + case OptimizationPass::kSideEffectsAnalysis: + opt = most_recent_side_effects = new (allocator) SideEffectsAnalysis(graph, name); + break; + case OptimizationPass::kInductionVarAnalysis: + opt = most_recent_induction = new (allocator) HInductionVarAnalysis(graph, name); + break; + case OptimizationPass::kLoadStoreAnalysis: + opt = most_recent_lsa = new (allocator) LoadStoreAnalysis(graph, name); + break; + // + // Passes that need prior analysis. + // + case OptimizationPass::kGlobalValueNumbering: + CHECK(most_recent_side_effects != nullptr); + opt = new (allocator) GVNOptimization(graph, *most_recent_side_effects, name); + break; + case OptimizationPass::kInvariantCodeMotion: + CHECK(most_recent_side_effects != nullptr); + opt = new (allocator) LICM(graph, *most_recent_side_effects, stats, name); + break; + case OptimizationPass::kLoopOptimization: + CHECK(most_recent_induction != nullptr); + opt = new (allocator) HLoopOptimization(graph, driver, most_recent_induction, stats, name); + break; + case OptimizationPass::kBoundsCheckElimination: + CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); + opt = new (allocator) BoundsCheckElimination( + graph, *most_recent_side_effects, most_recent_induction, name); + break; + case OptimizationPass::kLoadStoreElimination: + CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); + opt = new (allocator) LoadStoreElimination( + graph, *most_recent_side_effects, *most_recent_lsa, stats, name); + break; + // + // Regular passes. + // + case OptimizationPass::kConstantFolding: + opt = new (allocator) HConstantFolding(graph, name); + break; + case OptimizationPass::kDeadCodeElimination: + opt = new (allocator) HDeadCodeElimination(graph, stats, name); + break; + case OptimizationPass::kInliner: { + CodeItemDataAccessor accessor(*dex_compilation_unit.GetDexFile(), + dex_compilation_unit.GetCodeItem()); + opt = new (allocator) HInliner(graph, // outer_graph + graph, // outermost_graph + codegen, + dex_compilation_unit, // outer_compilation_unit + dex_compilation_unit, // outermost_compilation_unit + driver, + handles, + stats, + accessor.RegistersSize(), + /* total_number_of_instructions */ 0, + /* parent */ nullptr, + /* depth */ 0, + name); + break; + } + case OptimizationPass::kSharpening: + opt = new (allocator) HSharpening(graph, codegen, driver, name); + break; + case OptimizationPass::kSelectGenerator: + opt = new (allocator) HSelectGenerator(graph, handles, stats, name); + break; + case OptimizationPass::kInstructionSimplifier: + opt = new (allocator) InstructionSimplifier(graph, codegen, driver, stats, name); + break; + case OptimizationPass::kIntrinsicsRecognizer: + opt = new (allocator) IntrinsicsRecognizer(graph, stats, name); + break; + case OptimizationPass::kCHAGuardOptimization: + opt = new (allocator) CHAGuardOptimization(graph, name); + break; + case OptimizationPass::kCodeSinking: + opt = new (allocator) CodeSinking(graph, stats, name); + break; + case OptimizationPass::kConstructorFenceRedundancyElimination: + opt = new (allocator) ConstructorFenceRedundancyElimination(graph, stats, name); + break; + case OptimizationPass::kScheduling: + opt = new (allocator) HInstructionScheduling( + graph, driver->GetInstructionSet(), codegen, name); + break; + // + // Arch-specific passes. + // +#ifdef ART_ENABLE_CODEGEN_arm + case OptimizationPass::kInstructionSimplifierArm: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) arm::InstructionSimplifierArm(graph, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + case OptimizationPass::kInstructionSimplifierArm64: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) arm64::InstructionSimplifierArm64(graph, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_mips + case OptimizationPass::kPcRelativeFixupsMips: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) mips::PcRelativeFixups(graph, codegen, stats); + break; + case OptimizationPass::kInstructionSimplifierMips: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) mips::InstructionSimplifierMips(graph, codegen, stats); + break; +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + case OptimizationPass::kPcRelativeFixupsX86: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) x86::PcRelativeFixups(graph, codegen, stats); + break; + case OptimizationPass::kX86MemoryOperandGeneration: + DCHECK(alt_name == nullptr) << "arch-specific pass does not support alternative name"; + opt = new (allocator) x86::X86MemoryOperandGeneration(graph, codegen, stats); + break; +#endif + } // switch + + // Add each next optimization to result vector. + CHECK(opt != nullptr); + DCHECK_STREQ(name, opt->GetPassName()); // sanity + optimizations.push_back(opt); + } + + return optimizations; +} + } // namespace art diff --git a/compiler/optimizing/optimization.h b/compiler/optimizing/optimization.h index 0819fb01ac..c170f155fa 100644 --- a/compiler/optimizing/optimization.h +++ b/compiler/optimizing/optimization.h @@ -23,6 +23,10 @@ namespace art { +class CodeGenerator; +class CompilerDriver; +class DexCompilationUnit; + /** * Abstraction to implement an optimization pass. */ @@ -47,8 +51,6 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { virtual void Run() = 0; protected: - void MaybeRecordStat(MethodCompilationStat compilation_stat, size_t count = 1) const; - HGraph* const graph_; // Used to record stats about the optimization. OptimizingCompilerStats* const stats_; @@ -60,6 +62,81 @@ class HOptimization : public ArenaObject<kArenaAllocOptimization> { DISALLOW_COPY_AND_ASSIGN(HOptimization); }; +// Optimization passes that can be constructed by the helper method below. An enum +// field is preferred over a string lookup at places where performance matters. +// TODO: generate this table and lookup methods below automatically? +enum class OptimizationPass { + kBoundsCheckElimination, + kCHAGuardOptimization, + kCodeSinking, + kConstantFolding, + kConstructorFenceRedundancyElimination, + kDeadCodeElimination, + kGlobalValueNumbering, + kInductionVarAnalysis, + kInliner, + kInstructionSimplifier, + kIntrinsicsRecognizer, + kInvariantCodeMotion, + kLoadStoreAnalysis, + kLoadStoreElimination, + kLoopOptimization, + kScheduling, + kSelectGenerator, + kSharpening, + kSideEffectsAnalysis, +#ifdef ART_ENABLE_CODEGEN_arm + kInstructionSimplifierArm, +#endif +#ifdef ART_ENABLE_CODEGEN_arm64 + kInstructionSimplifierArm64, +#endif +#ifdef ART_ENABLE_CODEGEN_mips + kPcRelativeFixupsMips, + kInstructionSimplifierMips, +#endif +#ifdef ART_ENABLE_CODEGEN_x86 + kPcRelativeFixupsX86, +#endif +#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) + kX86MemoryOperandGeneration, +#endif +}; + +// Lookup name of optimization pass. +const char* OptimizationPassName(OptimizationPass pass); + +// Lookup optimization pass by name. +OptimizationPass OptimizationPassByName(const std::string& name); + +// Optimization definition consisting of an optimization pass +// and an optional alternative name (nullptr denotes default). +typedef std::pair<OptimizationPass, const char*> OptimizationDef; + +// Helper method for optimization definition array entries. +inline OptimizationDef OptDef(OptimizationPass pass, const char* name = nullptr) { + return std::make_pair(pass, name); +} + +// Helper method to construct series of optimization passes. +// The array should consist of the requested optimizations +// and optional alternative names for repeated passes. +// Example: +// { OptPass(kConstantFolding), +// OptPass(Inliner), +// OptPass(kConstantFolding, "constant_folding$after_inlining") +// } +ArenaVector<HOptimization*> ConstructOptimizations( + const OptimizationDef definitions[], + size_t length, + ArenaAllocator* allocator, + HGraph* graph, + OptimizingCompilerStats* stats, + CodeGenerator* codegen, + CompilerDriver* driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZATION_H_ diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc index 6cb27b3b1b..d20b681b49 100644 --- a/compiler/optimizing/optimizing_cfi_test.cc +++ b/compiler/optimizing/optimizing_cfi_test.cc @@ -18,13 +18,15 @@ #include <vector> #include "arch/instruction_set.h" +#include "base/runtime_debug.h" #include "cfi_test.h" #include "driver/compiler_options.h" #include "gtest/gtest.h" #include "optimizing/code_generator.h" #include "optimizing/optimizing_unit_test.h" -#include "utils/assembler.h" +#include "read_barrier_config.h" #include "utils/arm/assembler_arm_vixl.h" +#include "utils/assembler.h" #include "utils/mips/assembler_mips.h" #include "utils/mips64/assembler_mips64.h" @@ -39,28 +41,33 @@ namespace art { // Run the tests only on host. #ifndef ART_TARGET_ANDROID -class OptimizingCFITest : public CFITest { +class OptimizingCFITest : public CFITest, public OptimizingUnitTestHelper { public: // Enable this flag to generate the expected outputs. static constexpr bool kGenerateExpected = false; OptimizingCFITest() - : pool_(), - allocator_(&pool_), + : pool_and_allocator_(), opts_(), isa_features_(), graph_(nullptr), code_gen_(), - blocks_(allocator_.Adapter()) {} + blocks_(GetAllocator()->Adapter()) {} + + ArenaAllocator* GetAllocator() { return pool_and_allocator_.GetAllocator(); } void SetUpFrame(InstructionSet isa) { + // Ensure that slow-debug is off, so that there is no unexpected read-barrier check emitted. + SetRuntimeDebugFlagsEnabled(false); + // Setup simple context. std::string error; isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error); - graph_ = CreateGraph(&allocator_); + graph_ = CreateGraph(); // Generate simple frame with some spills. code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_); code_gen_->GetAssembler()->cfi().SetEnabled(true); + code_gen_->InitializeCodeGenerationData(); const int frame_size = 64; int core_reg = 0; int fp_reg = 0; @@ -141,8 +148,7 @@ class OptimizingCFITest : public CFITest { DISALLOW_COPY_AND_ASSIGN(InternalCodeAllocator); }; - ArenaPool pool_; - ArenaAllocator allocator_; + ArenaPoolAndAllocator pool_and_allocator_; CompilerOptions opts_; std::unique_ptr<const InstructionSetFeatures> isa_features_; HGraph* graph_; @@ -151,15 +157,15 @@ class OptimizingCFITest : public CFITest { InternalCodeAllocator code_allocator_; }; -#define TEST_ISA(isa) \ - TEST_F(OptimizingCFITest, isa) { \ - std::vector<uint8_t> expected_asm( \ - expected_asm_##isa, \ - expected_asm_##isa + arraysize(expected_asm_##isa)); \ - std::vector<uint8_t> expected_cfi( \ - expected_cfi_##isa, \ - expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ - TestImpl(isa, #isa, expected_asm, expected_cfi); \ +#define TEST_ISA(isa) \ + TEST_F(OptimizingCFITest, isa) { \ + std::vector<uint8_t> expected_asm( \ + expected_asm_##isa, \ + expected_asm_##isa + arraysize(expected_asm_##isa)); \ + std::vector<uint8_t> expected_cfi( \ + expected_cfi_##isa, \ + expected_cfi_##isa + arraysize(expected_cfi_##isa)); \ + TestImpl(InstructionSet::isa, #isa, expected_asm, expected_cfi); \ } #ifdef ART_ENABLE_CODEGEN_arm @@ -202,7 +208,7 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { std::vector<uint8_t> expected_cfi( expected_cfi_kThumb2_adjust, expected_cfi_kThumb2_adjust + arraysize(expected_cfi_kThumb2_adjust)); - SetUpFrame(kThumb2); + SetUpFrame(InstructionSet::kThumb2); #define __ down_cast<arm::ArmVIXLAssembler*>(GetCodeGenerator() \ ->GetAssembler())->GetVIXLAssembler()-> vixl32::Label target; @@ -214,7 +220,7 @@ TEST_F(OptimizingCFITest, kThumb2Adjust) { __ Bind(&target); #undef __ Finish(); - Check(kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); + Check(InstructionSet::kThumb2, "kThumb2_adjust", expected_asm, expected_cfi); } #endif @@ -233,7 +239,7 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { std::vector<uint8_t> expected_cfi( expected_cfi_kMips_adjust, expected_cfi_kMips_adjust + arraysize(expected_cfi_kMips_adjust)); - SetUpFrame(kMips); + SetUpFrame(InstructionSet::kMips); #define __ down_cast<mips::MipsAssembler*>(GetCodeGenerator()->GetAssembler())-> mips::MipsLabel target; __ Beqz(mips::A0, &target); @@ -244,7 +250,7 @@ TEST_F(OptimizingCFITest, kMipsAdjust) { __ Bind(&target); #undef __ Finish(); - Check(kMips, "kMips_adjust", expected_asm, expected_cfi); + Check(InstructionSet::kMips, "kMips_adjust", expected_asm, expected_cfi); } #endif @@ -263,7 +269,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) { std::vector<uint8_t> expected_cfi( expected_cfi_kMips64_adjust, expected_cfi_kMips64_adjust + arraysize(expected_cfi_kMips64_adjust)); - SetUpFrame(kMips64); + SetUpFrame(InstructionSet::kMips64); #define __ down_cast<mips64::Mips64Assembler*>(GetCodeGenerator()->GetAssembler())-> mips64::Mips64Label target; __ Beqc(mips64::A1, mips64::A2, &target); @@ -274,7 +280,7 @@ TEST_F(OptimizingCFITest, kMips64Adjust) { __ Bind(&target); #undef __ Finish(); - Check(kMips64, "kMips64_adjust", expected_asm, expected_cfi); + Check(InstructionSet::kMips64, "kMips64_adjust", expected_asm, expected_cfi); } #endif diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc index 77a63acd18..1e82c4b0f7 100644 --- a/compiler/optimizing/optimizing_cfi_test_expected.inc +++ b/compiler/optimizing/optimizing_cfi_test_expected.inc @@ -148,27 +148,27 @@ static constexpr uint8_t expected_cfi_kMips[] = { 0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; -// 0x00000000: addiu r29, r29, -64 +// 0x00000000: addiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw r31, +60(r29) +// 0x00000004: sw ra, +60(sp) // 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw r17, +56(r29) +// 0x00000008: sw s1, +56(sp) // 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw r16, +52(r29) +// 0x0000000c: sw s0, +52(sp) // 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(r29) -// 0x00000014: sdc1 f20, +32(r29) +// 0x00000010: sdc1 f22, +40(sp) +// 0x00000014: sdc1 f20, +32(sp) // 0x00000018: .cfi_remember_state -// 0x00000018: lw r31, +60(r29) +// 0x00000018: lw ra, +60(sp) // 0x0000001c: .cfi_restore: r31 -// 0x0000001c: lw r17, +56(r29) +// 0x0000001c: lw s1, +56(sp) // 0x00000020: .cfi_restore: r17 -// 0x00000020: lw r16, +52(r29) +// 0x00000020: lw s0, +52(sp) // 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f22, +40(r29) -// 0x00000028: ldc1 f20, +32(r29) -// 0x0000002c: jr r31 -// 0x00000030: addiu r29, r29, 64 +// 0x00000024: ldc1 f22, +40(sp) +// 0x00000028: ldc1 f20, +32(sp) +// 0x0000002c: jr ra +// 0x00000030: addiu sp, sp, 64 // 0x00000034: .cfi_def_cfa_offset: 0 // 0x00000034: .cfi_restore_state // 0x00000034: .cfi_def_cfa_offset: 64 @@ -185,32 +185,32 @@ static constexpr uint8_t expected_cfi_kMips64[] = { 0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -64 +// 0x00000000: daddiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd r31, +56(r29) +// 0x00000004: sd ra, +56(sp) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +48(r29) +// 0x00000008: sd s1, +48(sp) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +40(r29) +// 0x0000000c: sd s0, +40(sp) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(r29) +// 0x00000010: sdc1 f25, +32(sp) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(r29) +// 0x00000014: sdc1 f24, +24(sp) // 0x00000018: .cfi_offset: r56 at cfa-40 // 0x00000018: .cfi_remember_state -// 0x00000018: ld r31, +56(r29) +// 0x00000018: ld ra, +56(sp) // 0x0000001c: .cfi_restore: r31 -// 0x0000001c: ld r17, +48(r29) +// 0x0000001c: ld s1, +48(sp) // 0x00000020: .cfi_restore: r17 -// 0x00000020: ld r16, +40(r29) +// 0x00000020: ld s0, +40(sp) // 0x00000024: .cfi_restore: r16 -// 0x00000024: ldc1 f25, +32(r29) +// 0x00000024: ldc1 f25, +32(sp) // 0x00000028: .cfi_restore: r57 -// 0x00000028: ldc1 f24, +24(r29) +// 0x00000028: ldc1 f24, +24(sp) // 0x0000002c: .cfi_restore: r56 -// 0x0000002c: daddiu r29, r29, 64 +// 0x0000002c: daddiu sp, sp, 64 // 0x00000030: .cfi_def_cfa_offset: 0 -// 0x00000030: jic r31, 0 +// 0x00000030: jic ra, 0 // 0x00000034: .cfi_restore_state // 0x00000034: .cfi_def_cfa_offset: 64 @@ -330,10 +330,10 @@ static constexpr uint8_t expected_cfi_kThumb2_adjust[] = { static constexpr uint8_t expected_asm_kMips_adjust_head[] = { 0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF, 0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7, - 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27, + 0x08, 0x00, 0x80, 0x14, 0xF0, 0xFF, 0xBD, 0x27, 0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C, 0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F, - 0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27, + 0x09, 0x00, 0x20, 0x00, 0x10, 0x00, 0xBD, 0x27, }; static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F, 0x34, 0x00, 0xB0, 0x8F, @@ -342,45 +342,45 @@ static constexpr uint8_t expected_asm_kMips_adjust_tail[] = { }; static constexpr uint8_t expected_cfi_kMips_adjust[] = { 0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03, - 0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, + 0x50, 0x0E, 0x50, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40, }; -// 0x00000000: addiu r29, r29, -64 +// 0x00000000: addiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sw r31, +60(r29) +// 0x00000004: sw ra, +60(sp) // 0x00000008: .cfi_offset: r31 at cfa-4 -// 0x00000008: sw r17, +56(r29) +// 0x00000008: sw s1, +56(sp) // 0x0000000c: .cfi_offset: r17 at cfa-8 -// 0x0000000c: sw r16, +52(r29) +// 0x0000000c: sw s0, +52(sp) // 0x00000010: .cfi_offset: r16 at cfa-12 -// 0x00000010: sdc1 f22, +40(r29) -// 0x00000014: sdc1 f20, +32(r29) -// 0x00000018: bne r0, r4, 0x00000040 ; +36 -// 0x0000001c: addiu r29, r29, -4 -// 0x00000020: .cfi_def_cfa_offset: 68 -// 0x00000020: sw r31, +0(r29) -// 0x00000024: bltzal r0, 0x0000002c ; +4 -// 0x00000028: lui r1, 0x20000 -// 0x0000002c: ori r1, r1, 24 -// 0x00000030: addu r1, r1, r31 -// 0x00000034: lw r31, +0(r29) -// 0x00000038: jr r1 -// 0x0000003c: addiu r29, r29, 4 +// 0x00000010: sdc1 f22, +40(sp) +// 0x00000014: sdc1 f20, +32(sp) +// 0x00000018: bnez a0, 0x0000003c ; +36 +// 0x0000001c: addiu sp, sp, -16 +// 0x00000020: .cfi_def_cfa_offset: 80 +// 0x00000020: sw ra, +0(sp) +// 0x00000024: nal +// 0x00000028: lui at, 2 +// 0x0000002c: ori at, at, 24 +// 0x00000030: addu at, at, ra +// 0x00000034: lw ra, +0(sp) +// 0x00000038: jr at +// 0x0000003c: addiu sp, sp, 16 // 0x00000040: .cfi_def_cfa_offset: 64 // 0x00000040: nop // ... // 0x00020040: nop // 0x00020044: .cfi_remember_state -// 0x00020044: lw r31, +60(r29) +// 0x00020044: lw ra, +60(sp) // 0x00020048: .cfi_restore: r31 -// 0x00020048: lw r17, +56(r29) +// 0x00020048: lw s1, +56(sp) // 0x0002004c: .cfi_restore: r17 -// 0x0002004c: lw r16, +52(r29) +// 0x0002004c: lw s0, +52(sp) // 0x00020050: .cfi_restore: r16 -// 0x00020050: ldc1 f22, +40(r29) -// 0x00020054: ldc1 f20, +32(r29) -// 0x00020058: jr r31 -// 0x0002005c: addiu r29, r29, 64 +// 0x00020050: ldc1 f22, +40(sp) +// 0x00020054: ldc1 f20, +32(sp) +// 0x00020058: jr ra +// 0x0002005c: addiu sp, sp, 64 // 0x00020060: .cfi_def_cfa_offset: 0 // 0x00020060: .cfi_restore_state // 0x00020060: .cfi_def_cfa_offset: 64 @@ -401,37 +401,37 @@ static constexpr uint8_t expected_cfi_kMips64_adjust[] = { 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40, }; -// 0x00000000: daddiu r29, r29, -64 +// 0x00000000: daddiu sp, sp, -64 // 0x00000004: .cfi_def_cfa_offset: 64 -// 0x00000004: sd r31, +56(r29) +// 0x00000004: sd ra, +56(sp) // 0x00000008: .cfi_offset: r31 at cfa-8 -// 0x00000008: sd r17, +48(r29) +// 0x00000008: sd s1, +48(sp) // 0x0000000c: .cfi_offset: r17 at cfa-16 -// 0x0000000c: sd r16, +40(r29) +// 0x0000000c: sd s0, +40(sp) // 0x00000010: .cfi_offset: r16 at cfa-24 -// 0x00000010: sdc1 f25, +32(r29) +// 0x00000010: sdc1 f25, +32(sp) // 0x00000014: .cfi_offset: r57 at cfa-32 -// 0x00000014: sdc1 f24, +24(r29) +// 0x00000014: sdc1 f24, +24(sp) // 0x00000018: .cfi_offset: r56 at cfa-40 -// 0x00000018: bnec r5, r6, 0x00000024 ; +12 -// 0x0000001c: auipc r1, 2 -// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080 +// 0x00000018: bnec a1, a2, 0x00000024 ; +12 +// 0x0000001c: auipc at, 2 +// 0x00000020: jic at, 12 ; bc 0x00020028 ; +131080 // 0x00000024: nop // ... // 0x00020024: nop // 0x00020028: .cfi_remember_state -// 0x00020028: ld r31, +56(r29) +// 0x00020028: ld ra, +56(sp) // 0x0002002c: .cfi_restore: r31 -// 0x0002002c: ld r17, +48(r29) +// 0x0002002c: ld s1, +48(sp) // 0x00020030: .cfi_restore: r17 -// 0x00020030: ld r16, +40(r29) +// 0x00020030: ld s0, +40(sp) // 0x00020034: .cfi_restore: r16 -// 0x00020034: ldc1 f25, +32(r29) +// 0x00020034: ldc1 f25, +32(sp) // 0x00020038: .cfi_restore: r57 -// 0x00020038: ldc1 f24, +24(r29) +// 0x00020038: ldc1 f24, +24(sp) // 0x0002003c: .cfi_restore: r56 -// 0x0002003c: daddiu r29, r29, 64 +// 0x0002003c: daddiu sp, sp, 64 // 0x00020040: .cfi_def_cfa_offset: 0 -// 0x00020040: jic r31, 0 +// 0x00020040: jic ra, 0 // 0x00020044: .cfi_restore_state // 0x00020044: .cfi_def_cfa_offset: 64 diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index b76a0df861..e42dfc10ba 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -22,75 +22,41 @@ #include <stdint.h> -#include "android-base/strings.h" - -#ifdef ART_ENABLE_CODEGEN_arm64 -#include "instruction_simplifier_arm64.h" -#endif - -#ifdef ART_ENABLE_CODEGEN_mips -#include "pc_relative_fixups_mips.h" -#endif - -#ifdef ART_ENABLE_CODEGEN_x86 -#include "pc_relative_fixups_x86.h" -#endif - -#if defined(ART_ENABLE_CODEGEN_x86) || defined(ART_ENABLE_CODEGEN_x86_64) -#include "x86_memory_gen.h" -#endif - #include "art_method-inl.h" #include "base/arena_allocator.h" #include "base/arena_containers.h" #include "base/dumpable.h" #include "base/macros.h" #include "base/mutex.h" +#include "base/scoped_arena_allocator.h" #include "base/timing_logger.h" -#include "bounds_check_elimination.h" #include "builder.h" -#include "cha_guard_optimization.h" #include "code_generator.h" -#include "code_sinking.h" #include "compiled_method.h" #include "compiler.h" -#include "constant_folding.h" -#include "dead_code_elimination.h" #include "debug/elf_debug_writer.h" #include "debug/method_debug_info.h" +#include "dex/dex_file_types.h" #include "dex/verification_results.h" #include "dex/verified_method.h" -#include "dex_file_types.h" #include "driver/compiler_driver-inl.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" -#include "elf_writer_quick.h" #include "graph_checker.h" #include "graph_visualizer.h" -#include "gvn.h" -#include "induction_var_analysis.h" #include "inliner.h" -#include "instruction_simplifier.h" -#include "instruction_simplifier_arm.h" -#include "intrinsics.h" #include "jit/debugger_interface.h" #include "jit/jit.h" #include "jit/jit_code_cache.h" #include "jit/jit_logger.h" #include "jni/quick/jni_compiler.h" -#include "licm.h" -#include "load_store_analysis.h" -#include "load_store_elimination.h" -#include "loop_optimization.h" +#include "linker/linker_patch.h" #include "nodes.h" #include "oat_quick_method_header.h" #include "prepare_for_register_allocation.h" #include "reference_type_propagation.h" #include "register_allocator_linear_scan.h" #include "select_generator.h" -#include "scheduler.h" -#include "sharpening.h" -#include "side_effects_analysis.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" @@ -108,8 +74,8 @@ static constexpr const char* kPassNameSeparator = "$"; */ class CodeVectorAllocator FINAL : public CodeAllocator { public: - explicit CodeVectorAllocator(ArenaAllocator* arena) - : memory_(arena->Adapter(kArenaAllocCodeBuffer)), + explicit CodeVectorAllocator(ArenaAllocator* allocator) + : memory_(allocator->Adapter(kArenaAllocCodeBuffer)), size_(0) {} virtual uint8_t* Allocate(size_t size) { @@ -146,9 +112,9 @@ class PassObserver : public ValueObject { Mutex& dump_mutex) : graph_(graph), cached_method_name_(), - timing_logger_enabled_(compiler_driver->GetDumpPasses()), + timing_logger_enabled_(compiler_driver->GetCompilerOptions().GetDumpTimings()), timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true), - disasm_info_(graph->GetArena()), + disasm_info_(graph->GetAllocator()), visualizer_oss_(), visualizer_output_(visualizer_output), visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()), @@ -311,13 +277,7 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* JniCompile(uint32_t access_flags, uint32_t method_idx, const DexFile& dex_file, - JniOptimizationFlags optimization_flags) const OVERRIDE { - return ArtQuickJniCompileMethod(GetCompilerDriver(), - access_flags, - method_idx, - dex_file, - optimization_flags); - } + Handle<mirror::DexCache> dex_cache) const OVERRIDE; uintptr_t GetEntryPointOf(ArtMethod* method) const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) { @@ -329,12 +289,6 @@ class OptimizingCompiler FINAL : public Compiler { void UnInit() const OVERRIDE; - void MaybeRecordStat(MethodCompilationStat compilation_stat) const { - if (compilation_stats_.get() != nullptr) { - compilation_stats_->RecordStat(compilation_stat); - } - } - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, @@ -346,21 +300,52 @@ class OptimizingCompiler FINAL : public Compiler { private: void RunOptimizations(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, - VariableSizedHandleScope* handles) const; + VariableSizedHandleScope* handles, + const OptimizationDef definitions[], + size_t length) const { + // Convert definitions to optimization passes. + ArenaVector<HOptimization*> optimizations = ConstructOptimizations( + definitions, + length, + graph->GetAllocator(), + graph, + compilation_stats_.get(), + codegen, + GetCompilerDriver(), + dex_compilation_unit, + handles); + DCHECK_EQ(length, optimizations.size()); + // Run the optimization passes one by one. + for (size_t i = 0; i < length; ++i) { + PassScope scope(optimizations[i]->GetPassName(), pass_observer); + optimizations[i]->Run(); + } + } + + template <size_t length> void RunOptimizations( + HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles, + const OptimizationDef (&definitions)[length]) const { + RunOptimizations( + graph, codegen, dex_compilation_unit, pass_observer, handles, definitions, length); + } - void RunOptimizations(HOptimization* optimizations[], - size_t length, - PassObserver* pass_observer) const; + void RunOptimizations(HGraph* graph, + CodeGenerator* codegen, + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const; private: // Create a 'CompiledMethod' for an optimized graph. - CompiledMethod* Emit(ArenaAllocator* arena, + CompiledMethod* Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* driver, const DexFile::CodeItem* item) const; // Try compiling a method and return the code generator used for @@ -370,31 +355,35 @@ class OptimizingCompiler FINAL : public Compiler { // 2) Transforms the graph to SSA. Returns null if it failed. // 3) Runs optimizations on the graph, including register allocator. // 4) Generates code with the `code_allocator` provided. - CodeGenerator* TryCompile(ArenaAllocator* arena, + CodeGenerator* TryCompile(ArenaAllocator* allocator, + ArenaStack* arena_stack, CodeVectorAllocator* code_allocator, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, + const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, bool osr, VariableSizedHandleScope* handles) const; + CodeGenerator* TryCompileIntrinsic(ArenaAllocator* allocator, + ArenaStack* arena_stack, + CodeVectorAllocator* code_allocator, + const DexCompilationUnit& dex_compilation_unit, + ArtMethod* method, + VariableSizedHandleScope* handles) const; + void MaybeRunInliner(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const; - void RunArchOptimizations(InstructionSet instruction_set, - HGraph* graph, + void RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, - PassObserver* pass_observer) const; + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const; + + void GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo method_debug_info) + REQUIRES_SHARED(Locks::mutator_lock_); std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -421,7 +410,7 @@ void OptimizingCompiler::Init() { driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out; visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode)); } - if (driver->GetDumpStats()) { + if (driver->GetCompilerOptions().GetDumpStats()) { compilation_stats_.reset(new OptimizingCompilerStats()); } } @@ -441,269 +430,139 @@ bool OptimizingCompiler::CanCompileMethod(uint32_t method_idx ATTRIBUTE_UNUSED, } static bool IsInstructionSetSupported(InstructionSet instruction_set) { - return (instruction_set == kArm && !kArm32QuickCodeUseSoftFloat) - || instruction_set == kArm64 - || (instruction_set == kThumb2 && !kArm32QuickCodeUseSoftFloat) - || instruction_set == kMips - || instruction_set == kMips64 - || instruction_set == kX86 - || instruction_set == kX86_64; -} - -// Strip pass name suffix to get optimization name. -static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) { - size_t pos = pass_name.find(kPassNameSeparator); - return pos == std::string::npos ? pass_name : pass_name.substr(0, pos); -} - -static HOptimization* BuildOptimization( - const std::string& pass_name, - ArenaAllocator* arena, - HGraph* graph, - OptimizingCompilerStats* stats, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles, - SideEffectsAnalysis* most_recent_side_effects, - HInductionVarAnalysis* most_recent_induction, - LoadStoreAnalysis* most_recent_lsa) { - std::string opt_name = ConvertPassNameToOptimizationName(pass_name); - if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) { - CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr); - return new (arena) BoundsCheckElimination(graph, - *most_recent_side_effects, - most_recent_induction); - } else if (opt_name == GVNOptimization::kGlobalValueNumberingPassName) { - CHECK(most_recent_side_effects != nullptr); - return new (arena) GVNOptimization(graph, *most_recent_side_effects, pass_name.c_str()); - } else if (opt_name == HConstantFolding::kConstantFoldingPassName) { - return new (arena) HConstantFolding(graph, pass_name.c_str()); - } else if (opt_name == HDeadCodeElimination::kDeadCodeEliminationPassName) { - return new (arena) HDeadCodeElimination(graph, stats, pass_name.c_str()); - } else if (opt_name == HInliner::kInlinerPassName) { - size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; - return new (arena) HInliner(graph, // outer_graph - graph, // outermost_graph - codegen, - dex_compilation_unit, // outer_compilation_unit - dex_compilation_unit, // outermost_compilation_unit - driver, - handles, - stats, - number_of_dex_registers, - /* total_number_of_instructions */ 0, - /* parent */ nullptr); - } else if (opt_name == HSharpening::kSharpeningPassName) { - return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles); - } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) { - return new (arena) HSelectGenerator(graph, stats); - } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { - return new (arena) HInductionVarAnalysis(graph); - } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) { - return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str()); - } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) { - return new (arena) IntrinsicsRecognizer(graph, stats); - } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) { - CHECK(most_recent_side_effects != nullptr); - return new (arena) LICM(graph, *most_recent_side_effects, stats); - } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { - return new (arena) LoadStoreAnalysis(graph); - } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) { - CHECK(most_recent_side_effects != nullptr); - CHECK(most_recent_lsa != nullptr); - return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa); - } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { - return new (arena) SideEffectsAnalysis(graph); - } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) { - return new (arena) HLoopOptimization(graph, driver, most_recent_induction); - } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) { - return new (arena) CHAGuardOptimization(graph); - } else if (opt_name == CodeSinking::kCodeSinkingPassName) { - return new (arena) CodeSinking(graph, stats); -#ifdef ART_ENABLE_CODEGEN_arm - } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) { - return new (arena) arm::InstructionSimplifierArm(graph, stats); -#endif -#ifdef ART_ENABLE_CODEGEN_arm64 - } else if (opt_name == arm64::InstructionSimplifierArm64::kInstructionSimplifierArm64PassName) { - return new (arena) arm64::InstructionSimplifierArm64(graph, stats); -#endif -#ifdef ART_ENABLE_CODEGEN_mips - } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) { - return new (arena) mips::PcRelativeFixups(graph, codegen, stats); -#endif -#ifdef ART_ENABLE_CODEGEN_x86 - } else if (opt_name == x86::PcRelativeFixups::kPcRelativeFixupsX86PassName) { - return new (arena) x86::PcRelativeFixups(graph, codegen, stats); - } else if (opt_name == x86::X86MemoryOperandGeneration::kX86MemoryOperandGenerationPassName) { - return new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); -#endif - } - return nullptr; -} - -static ArenaVector<HOptimization*> BuildOptimizations( - const std::vector<std::string>& pass_names, - ArenaAllocator* arena, - HGraph* graph, - OptimizingCompilerStats* stats, - CodeGenerator* codegen, - CompilerDriver* driver, - const DexCompilationUnit& dex_compilation_unit, - VariableSizedHandleScope* handles) { - // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis - // instances. This method assumes that each of them expects the nearest instance preceeding it - // in the pass name list. - SideEffectsAnalysis* most_recent_side_effects = nullptr; - HInductionVarAnalysis* most_recent_induction = nullptr; - LoadStoreAnalysis* most_recent_lsa = nullptr; - ArenaVector<HOptimization*> ret(arena->Adapter()); - for (const std::string& pass_name : pass_names) { - HOptimization* opt = BuildOptimization( - pass_name, - arena, - graph, - stats, - codegen, - driver, - dex_compilation_unit, - handles, - most_recent_side_effects, - most_recent_induction, - most_recent_lsa); - CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\""; - ret.push_back(opt); - - std::string opt_name = ConvertPassNameToOptimizationName(pass_name); - if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) { - most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt); - } else if (opt_name == HInductionVarAnalysis::kInductionPassName) { - most_recent_induction = down_cast<HInductionVarAnalysis*>(opt); - } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) { - most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt); - } - } - return ret; -} - -void OptimizingCompiler::RunOptimizations(HOptimization* optimizations[], - size_t length, - PassObserver* pass_observer) const { - for (size_t i = 0; i < length; ++i) { - PassScope scope(optimizations[i]->GetPassName(), pass_observer); - optimizations[i]->Run(); - } + return instruction_set == InstructionSet::kArm + || instruction_set == InstructionSet::kArm64 + || instruction_set == InstructionSet::kThumb2 + || instruction_set == InstructionSet::kMips + || instruction_set == InstructionSet::kMips64 + || instruction_set == InstructionSet::kX86 + || instruction_set == InstructionSet::kX86_64; } void OptimizingCompiler::MaybeRunInliner(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - OptimizingCompilerStats* stats = compilation_stats_.get(); - const CompilerOptions& compiler_options = driver->GetCompilerOptions(); + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0); if (!should_inline) { return; } - size_t number_of_dex_registers = dex_compilation_unit.GetCodeItem()->registers_size_; - HInliner* inliner = new (graph->GetArena()) HInliner( - graph, // outer_graph - graph, // outermost_graph - codegen, - dex_compilation_unit, // outer_compilation_unit - dex_compilation_unit, // outermost_compilation_unit - driver, - handles, - stats, - number_of_dex_registers, - /* total_number_of_instructions */ 0, - /* parent */ nullptr); - HOptimization* optimizations[] = { inliner }; - - RunOptimizations(optimizations, arraysize(optimizations), pass_observer); + OptimizationDef optimizations[] = { + OptDef(OptimizationPass::kInliner) + }; + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + optimizations); } -void OptimizingCompiler::RunArchOptimizations(InstructionSet instruction_set, - HGraph* graph, +void OptimizingCompiler::RunArchOptimizations(HGraph* graph, CodeGenerator* codegen, - PassObserver* pass_observer) const { - UNUSED(codegen); // To avoid compilation error when compiling for svelte - OptimizingCompilerStats* stats = compilation_stats_.get(); - ArenaAllocator* arena = graph->GetArena(); - switch (instruction_set) { + const DexCompilationUnit& dex_compilation_unit, + PassObserver* pass_observer, + VariableSizedHandleScope* handles) const { + switch (GetCompilerDriver()->GetInstructionSet()) { #if defined(ART_ENABLE_CODEGEN_arm) - case kThumb2: - case kArm: { - arm::InstructionSimplifierArm* simplifier = - new (arena) arm::InstructionSimplifierArm(graph, stats); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); - HInstructionScheduling* scheduling = - new (arena) HInstructionScheduling(graph, instruction_set, codegen); - HOptimization* arm_optimizations[] = { - simplifier, - side_effects, - gvn, - scheduling, + case InstructionSet::kThumb2: + case InstructionSet::kArm: { + OptimizationDef arm_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierArm), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm_optimizations); break; } #endif #ifdef ART_ENABLE_CODEGEN_arm64 - case kArm64: { - arm64::InstructionSimplifierArm64* simplifier = - new (arena) arm64::InstructionSimplifierArm64(graph, stats); - SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects, "GVN$after_arch"); - HInstructionScheduling* scheduling = - new (arena) HInstructionScheduling(graph, instruction_set); - HOptimization* arm64_optimizations[] = { - simplifier, - side_effects, - gvn, - scheduling, + case InstructionSet::kArm64: { + OptimizationDef arm64_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierArm64), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kScheduling) }; - RunOptimizations(arm64_optimizations, arraysize(arm64_optimizations), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + arm64_optimizations); break; } #endif #ifdef ART_ENABLE_CODEGEN_mips - case kMips: { - mips::PcRelativeFixups* pc_relative_fixups = - new (arena) mips::PcRelativeFixups(graph, codegen, stats); - HOptimization* mips_optimizations[] = { - pc_relative_fixups, + case InstructionSet::kMips: { + OptimizationDef mips_optimizations[] = { + OptDef(OptimizationPass::kInstructionSimplifierMips), + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kPcRelativeFixupsMips) + }; + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips_optimizations); + break; + } +#endif +#ifdef ART_ENABLE_CODEGEN_mips64 + case InstructionSet::kMips64: { + OptimizationDef mips64_optimizations[] = { + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch") }; - RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + mips64_optimizations); break; } #endif #ifdef ART_ENABLE_CODEGEN_x86 - case kX86: { - x86::PcRelativeFixups* pc_relative_fixups = - new (arena) x86::PcRelativeFixups(graph, codegen, stats); - x86::X86MemoryOperandGeneration* memory_gen = - new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); - HOptimization* x86_optimizations[] = { - pc_relative_fixups, - memory_gen + case InstructionSet::kX86: { + OptimizationDef x86_optimizations[] = { + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kPcRelativeFixupsX86), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(x86_optimizations, arraysize(x86_optimizations), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_optimizations); break; } #endif #ifdef ART_ENABLE_CODEGEN_x86_64 - case kX86_64: { - x86::X86MemoryOperandGeneration* memory_gen = - new (arena) x86::X86MemoryOperandGeneration(graph, codegen, stats); - HOptimization* x86_64_optimizations[] = { - memory_gen + case InstructionSet::kX86_64: { + OptimizationDef x86_64_optimizations[] = { + OptDef(OptimizationPass::kSideEffectsAnalysis), + OptDef(OptimizationPass::kGlobalValueNumbering, "GVN$after_arch"), + OptDef(OptimizationPass::kX86MemoryOperandGeneration) }; - RunOptimizations(x86_64_optimizations, arraysize(x86_64_optimizations), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + x86_64_optimizations); break; } #endif @@ -716,144 +575,138 @@ NO_INLINE // Avoid increasing caller's frame size by large stack-allocated obje static void AllocateRegisters(HGraph* graph, CodeGenerator* codegen, PassObserver* pass_observer, - RegisterAllocator::Strategy strategy) { + RegisterAllocator::Strategy strategy, + OptimizingCompilerStats* stats) { { PassScope scope(PrepareForRegisterAllocation::kPrepareForRegisterAllocationPassName, pass_observer); - PrepareForRegisterAllocation(graph).Run(); + PrepareForRegisterAllocation(graph, stats).Run(); } - SsaLivenessAnalysis liveness(graph, codegen); + // Use local allocator shared by SSA liveness analysis and register allocator. + // (Register allocator creates new objects in the liveness data.) + ScopedArenaAllocator local_allocator(graph->GetArenaStack()); + SsaLivenessAnalysis liveness(graph, codegen, &local_allocator); { PassScope scope(SsaLivenessAnalysis::kLivenessPassName, pass_observer); liveness.Analyze(); } { PassScope scope(RegisterAllocator::kRegisterAllocatorPassName, pass_observer); - RegisterAllocator::Create(graph->GetArena(), codegen, liveness, strategy)->AllocateRegisters(); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(&local_allocator, codegen, liveness, strategy); + register_allocator->AllocateRegisters(); } } +// Strip pass name suffix to get optimization name. +static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) { + size_t pos = pass_name.find(kPassNameSeparator); + return pos == std::string::npos ? pass_name : pass_name.substr(0, pos); +} + void OptimizingCompiler::RunOptimizations(HGraph* graph, CodeGenerator* codegen, - CompilerDriver* driver, const DexCompilationUnit& dex_compilation_unit, PassObserver* pass_observer, VariableSizedHandleScope* handles) const { - OptimizingCompilerStats* stats = compilation_stats_.get(); - ArenaAllocator* arena = graph->GetArena(); - if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) { - ArenaVector<HOptimization*> optimizations = BuildOptimizations( - *driver->GetCompilerOptions().GetPassesToRun(), - arena, - graph, - stats, - codegen, - driver, - dex_compilation_unit, - handles); - RunOptimizations(&optimizations[0], optimizations.size(), pass_observer); + const std::vector<std::string>* pass_names = + GetCompilerDriver()->GetCompilerOptions().GetPassesToRun(); + if (pass_names != nullptr) { + // If passes were defined on command-line, build the optimization + // passes and run these instead of the built-in optimizations. + const size_t length = pass_names->size(); + std::vector<OptimizationDef> optimizations; + for (const std::string& pass_name : *pass_names) { + std::string opt_name = ConvertPassNameToOptimizationName(pass_name); + optimizations.push_back(OptDef(OptimizationPassByName(opt_name.c_str()), pass_name.c_str())); + } + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + optimizations.data(), + length); return; } - HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination( - graph, stats, "dead_code_elimination$initial"); - HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination( - graph, stats, "dead_code_elimination$after_inlining"); - HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination( - graph, stats, "dead_code_elimination$final"); - HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding"); - InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier( - graph, codegen, driver, stats); - HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats); - HConstantFolding* fold2 = new (arena) HConstantFolding( - graph, "constant_folding$after_inlining"); - HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce"); - SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis( - graph, "side_effects$before_gvn"); - SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis( - graph, "side_effects$before_lse"); - GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1); - LICM* licm = new (arena) LICM(graph, *side_effects1, stats); - HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph); - BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction); - HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction); - LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph); - LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa); - HSharpening* sharpening = new (arena) HSharpening( - graph, codegen, dex_compilation_unit, driver, handles); - InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier( - graph, codegen, driver, stats, "instruction_simplifier$after_inlining"); - InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier( - graph, codegen, driver, stats, "instruction_simplifier$after_bce"); - InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier( - graph, codegen, driver, stats, "instruction_simplifier$before_codegen"); - IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats); - CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph); - CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats); - - HOptimization* optimizations1[] = { - intrinsics, - sharpening, - fold1, - simplify1, - dce1, + OptimizationDef optimizations1[] = { + OptDef(OptimizationPass::kIntrinsicsRecognizer), + OptDef(OptimizationPass::kSharpening), + OptDef(OptimizationPass::kConstantFolding), + OptDef(OptimizationPass::kInstructionSimplifier), + OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$initial") }; - RunOptimizations(optimizations1, arraysize(optimizations1), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + optimizations1); - MaybeRunInliner(graph, codegen, driver, dex_compilation_unit, pass_observer, handles); + MaybeRunInliner(graph, codegen, dex_compilation_unit, pass_observer, handles); - HOptimization* optimizations2[] = { + OptimizationDef optimizations2[] = { // SelectGenerator depends on the InstructionSimplifier removing // redundant suspend checks to recognize empty blocks. - select_generator, - fold2, // TODO: if we don't inline we can also skip fold2. - simplify2, - dce2, - side_effects1, - gvn, - licm, - induction, - bce, - loop, - fold3, // evaluates code generated by dynamic bce - simplify3, - side_effects2, - lsa, - lse, - cha_guard, - dce3, - code_sinking, + OptDef(OptimizationPass::kSelectGenerator), + // TODO: if we don't inline we can also skip fold2. + OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_inlining"), + OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_inlining"), + OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$after_inlining"), + OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_gvn"), + OptDef(OptimizationPass::kGlobalValueNumbering), + OptDef(OptimizationPass::kInvariantCodeMotion), + OptDef(OptimizationPass::kInductionVarAnalysis), + OptDef(OptimizationPass::kBoundsCheckElimination), + OptDef(OptimizationPass::kLoopOptimization), + // Evaluates code generated by dynamic bce. + OptDef(OptimizationPass::kConstantFolding, "constant_folding$after_bce"), + OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$after_bce"), + OptDef(OptimizationPass::kSideEffectsAnalysis, "side_effects$before_lse"), + OptDef(OptimizationPass::kLoadStoreAnalysis), + OptDef(OptimizationPass::kLoadStoreElimination), + OptDef(OptimizationPass::kCHAGuardOptimization), + OptDef(OptimizationPass::kDeadCodeElimination, "dead_code_elimination$final"), + OptDef(OptimizationPass::kCodeSinking), // The codegen has a few assumptions that only the instruction simplifier // can satisfy. For example, the code generator does not expect to see a // HTypeConversion from a type to the same type. - simplify4, + OptDef(OptimizationPass::kInstructionSimplifier, "instruction_simplifier$before_codegen"), + // Eliminate constructor fences after code sinking to avoid + // complicated sinking logic to split a fence with many inputs. + OptDef(OptimizationPass::kConstructorFenceRedundancyElimination) }; - RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer); + RunOptimizations(graph, + codegen, + dex_compilation_unit, + pass_observer, + handles, + optimizations2); - RunArchOptimizations(driver->GetInstructionSet(), graph, codegen, pass_observer); + RunArchOptimizations(graph, codegen, dex_compilation_unit, pass_observer, handles); } -static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { - ArenaVector<LinkerPatch> linker_patches(codegen->GetGraph()->GetArena()->Adapter()); +static ArenaVector<linker::LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) { + ArenaVector<linker::LinkerPatch> linker_patches(codegen->GetGraph()->GetAllocator()->Adapter()); codegen->EmitLinkerPatches(&linker_patches); // Sort patches by literal offset. Required for .oat_patches encoding. std::sort(linker_patches.begin(), linker_patches.end(), - [](const LinkerPatch& lhs, const LinkerPatch& rhs) { + [](const linker::LinkerPatch& lhs, const linker::LinkerPatch& rhs) { return lhs.LiteralOffset() < rhs.LiteralOffset(); }); return linker_patches; } -CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, +CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* allocator, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexFile::CodeItem* code_item) const { - ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); - ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); - ArenaVector<uint8_t> method_info(arena->Adapter(kArenaAllocStackMaps)); + const DexFile::CodeItem* code_item_for_osr_check) const { + ArenaVector<linker::LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); + ArenaVector<uint8_t> stack_map(allocator->Adapter(kArenaAllocStackMaps)); + ArenaVector<uint8_t> method_info(allocator->Adapter(kArenaAllocStackMaps)); size_t stack_map_size = 0; size_t method_info_size = 0; codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size); @@ -861,10 +714,10 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, method_info.resize(method_info_size); codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), MemoryRegion(method_info.data(), method_info.size()), - *code_item); + code_item_for_osr_check); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( - compiler_driver, + GetCompilerDriver(), codegen->GetInstructionSet(), ArrayRef<const uint8_t>(code_allocator->GetMemory()), // Follow Quick's behavior and set the frame size to zero if it is @@ -876,40 +729,38 @@ CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, ArrayRef<const uint8_t>(method_info), ArrayRef<const uint8_t>(stack_map), ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()), - ArrayRef<const LinkerPatch>(linker_patches)); + ArrayRef<const linker::LinkerPatch>(linker_patches)); return compiled_method; } -CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, +CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, + ArenaStack* arena_stack, CodeVectorAllocator* code_allocator, - const DexFile::CodeItem* code_item, - uint32_t access_flags, - InvokeType invoke_type, - uint16_t class_def_idx, - uint32_t method_idx, - Handle<mirror::ClassLoader> class_loader, - const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache, + const DexCompilationUnit& dex_compilation_unit, ArtMethod* method, bool osr, VariableSizedHandleScope* handles) const { - MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptBytecodeCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); + uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); + const DexFile::CodeItem* code_item = dex_compilation_unit.GetCodeItem(); // Always use the Thumb-2 assembler: some runtime functionality // (like implicit stack overflow checks) assume Thumb-2. - DCHECK_NE(instruction_set, kArm); + DCHECK_NE(instruction_set, InstructionSet::kArm); // Do not attempt to compile on architectures we do not support. if (!IsInstructionSetSupported(instruction_set)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledUnsupportedIsa); + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledUnsupportedIsa); return nullptr; } if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledPathological); return nullptr; } @@ -918,25 +769,16 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, static constexpr size_t kSpaceFilterOptimizingThreshold = 128; const CompilerOptions& compiler_options = compiler_driver->GetCompilerOptions(); if ((compiler_options.GetCompilerFilter() == CompilerFilter::kSpace) - && (code_item->insns_size_in_code_units_ > kSpaceFilterOptimizingThreshold)) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledSpaceFilter); + && (CodeItemInstructionAccessor(dex_file, code_item).InsnsSizeInCodeUnits() > + kSpaceFilterOptimizingThreshold)) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledSpaceFilter); return nullptr; } - ClassLinker* class_linker = Runtime::Current()->GetClassLinker(); - DexCompilationUnit dex_compilation_unit( - class_loader, - class_linker, - dex_file, - code_item, - class_def_idx, - method_idx, - access_flags, - /* verified_method */ nullptr, - dex_cache); - - HGraph* graph = new (arena) HGraph( - arena, + CodeItemDebugInfoAccessor code_item_accessor(dex_file, code_item, method_idx); + HGraph* graph = new (allocator) HGraph( + allocator, + arena_stack, dex_file, method_idx, compiler_driver->GetInstructionSet(), @@ -944,19 +786,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions().GetDebuggable(), osr); - const uint8_t* interpreter_metadata = nullptr; - if (method == nullptr) { - ScopedObjectAccess soa(Thread::Current()); - method = compiler_driver->ResolveMethod( - soa, dex_cache, class_loader, &dex_compilation_unit, method_idx, invoke_type); - } + ArrayRef<const uint8_t> interpreter_metadata; // For AOT compilation, we may not get a method, for example if its class is erroneous. // JIT should always have a method. DCHECK(Runtime::Current()->IsAotCompiler() || method != nullptr); if (method != nullptr) { graph->SetArtMethod(method); ScopedObjectAccess soa(Thread::Current()); - interpreter_metadata = method->GetQuickenedInfo(class_linker->GetImagePointerSize()); + interpreter_metadata = method->GetQuickenedInfo(); } std::unique_ptr<CodeGenerator> codegen( @@ -966,7 +803,7 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, compiler_driver->GetCompilerOptions(), compilation_stats_.get())); if (codegen.get() == nullptr) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledNoCodegen); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kNotCompiledNoCodegen); return nullptr; } codegen->GetAssembler()->cfi().SetEnabled( @@ -982,30 +819,36 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, VLOG(compiler) << "Building " << pass_observer.GetMethodName(); PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); HGraphBuilder builder(graph, + code_item_accessor, &dex_compilation_unit, &dex_compilation_unit, - &dex_file, - *code_item, compiler_driver, codegen.get(), compilation_stats_.get(), interpreter_metadata, - dex_cache, handles); GraphAnalysisResult result = builder.BuildGraph(); if (result != kAnalysisSuccess) { switch (result) { - case kAnalysisSkipped: - MaybeRecordStat(MethodCompilationStat::kNotCompiledSkipped); + case kAnalysisSkipped: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledSkipped); + } break; - case kAnalysisInvalidBytecode: - MaybeRecordStat(MethodCompilationStat::kNotCompiledInvalidBytecode); + case kAnalysisInvalidBytecode: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledInvalidBytecode); + } break; - case kAnalysisFailThrowCatchLoop: - MaybeRecordStat(MethodCompilationStat::kNotCompiledThrowCatchLoop); + case kAnalysisFailThrowCatchLoop: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledThrowCatchLoop); + } break; - case kAnalysisFailAmbiguousArrayOp: - MaybeRecordStat(MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + case kAnalysisFailAmbiguousArrayOp: { + MaybeRecordStat(compilation_stats_.get(), + MethodCompilationStat::kNotCompiledAmbiguousArrayOp); + } break; case kAnalysisSuccess: UNREACHABLE(); @@ -1017,18 +860,126 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, RunOptimizations(graph, codegen.get(), - compiler_driver, dex_compilation_unit, &pass_observer, handles); RegisterAllocator::Strategy regalloc_strategy = compiler_options.GetRegisterAllocationStrategy(); - AllocateRegisters(graph, codegen.get(), &pass_observer, regalloc_strategy); + AllocateRegisters(graph, + codegen.get(), + &pass_observer, + regalloc_strategy, + compilation_stats_.get()); codegen->Compile(code_allocator); pass_observer.DumpDisassembly(); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledBytecode); + return codegen.release(); +} + +CodeGenerator* OptimizingCompiler::TryCompileIntrinsic( + ArenaAllocator* allocator, + ArenaStack* arena_stack, + CodeVectorAllocator* code_allocator, + const DexCompilationUnit& dex_compilation_unit, + ArtMethod* method, + VariableSizedHandleScope* handles) const { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kAttemptIntrinsicCompilation); + CompilerDriver* compiler_driver = GetCompilerDriver(); + InstructionSet instruction_set = compiler_driver->GetInstructionSet(); + const DexFile& dex_file = *dex_compilation_unit.GetDexFile(); + uint32_t method_idx = dex_compilation_unit.GetDexMethodIndex(); + + // Always use the Thumb-2 assembler: some runtime functionality + // (like implicit stack overflow checks) assume Thumb-2. + DCHECK_NE(instruction_set, InstructionSet::kArm); + + // Do not attempt to compile on architectures we do not support. + if (!IsInstructionSetSupported(instruction_set)) { + return nullptr; + } + + HGraph* graph = new (allocator) HGraph( + allocator, + arena_stack, + dex_file, + method_idx, + compiler_driver->GetInstructionSet(), + kInvalidInvokeType, + compiler_driver->GetCompilerOptions().GetDebuggable(), + /* osr */ false); + + DCHECK(Runtime::Current()->IsAotCompiler()); + DCHECK(method != nullptr); + graph->SetArtMethod(method); + + std::unique_ptr<CodeGenerator> codegen( + CodeGenerator::Create(graph, + instruction_set, + *compiler_driver->GetInstructionSetFeatures(), + compiler_driver->GetCompilerOptions(), + compilation_stats_.get())); + if (codegen.get() == nullptr) { + return nullptr; + } + codegen->GetAssembler()->cfi().SetEnabled( + compiler_driver->GetCompilerOptions().GenerateAnyDebugInfo()); + + PassObserver pass_observer(graph, + codegen.get(), + visualizer_output_.get(), + compiler_driver, + dump_mutex_); + + { + VLOG(compiler) << "Building intrinsic graph " << pass_observer.GetMethodName(); + PassScope scope(HGraphBuilder::kBuilderPassName, &pass_observer); + HGraphBuilder builder(graph, + CodeItemDebugInfoAccessor(), // Null code item. + &dex_compilation_unit, + &dex_compilation_unit, + compiler_driver, + codegen.get(), + compilation_stats_.get(), + /* interpreter_metadata */ ArrayRef<const uint8_t>(), + handles); + builder.BuildIntrinsicGraph(method); + } + + OptimizationDef optimizations[] = { + OptDef(OptimizationPass::kIntrinsicsRecognizer), + // Some intrinsics are converted to HIR by the simplifier and the codegen also + // has a few assumptions that only the instruction simplifier can satisfy. + OptDef(OptimizationPass::kInstructionSimplifier), + }; + RunOptimizations(graph, + codegen.get(), + dex_compilation_unit, + &pass_observer, + handles, + optimizations); + + RunArchOptimizations(graph, codegen.get(), dex_compilation_unit, &pass_observer, handles); + + AllocateRegisters(graph, + codegen.get(), + &pass_observer, + compiler_driver->GetCompilerOptions().GetRegisterAllocationStrategy(), + compilation_stats_.get()); + if (!codegen->IsLeafMethod()) { + VLOG(compiler) << "Intrinsic method is not leaf: " << method->GetIntrinsic() + << " " << graph->PrettyMethod(); + return nullptr; + } + + codegen->Compile(code_allocator); + pass_observer.DumpDisassembly(); + + VLOG(compiler) << "Compiled intrinsic: " << method->GetIntrinsic() + << " " << graph->PrettyMethod(); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledIntrinsic); return codegen.release(); } @@ -1041,53 +992,90 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, const DexFile& dex_file, Handle<mirror::DexCache> dex_cache) const { CompilerDriver* compiler_driver = GetCompilerDriver(); - CompiledMethod* method = nullptr; - DCHECK(Runtime::Current()->IsAotCompiler()); + CompiledMethod* compiled_method = nullptr; + Runtime* runtime = Runtime::Current(); + DCHECK(runtime->IsAotCompiler()); const VerifiedMethod* verified_method = compiler_driver->GetVerifiedMethod(&dex_file, method_idx); DCHECK(!verified_method->HasRuntimeThrow()); - if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) - || verifier::CanCompilerHandleVerificationFailure( - verified_method->GetEncounteredVerificationFailures())) { - ArenaAllocator arena(Runtime::Current()->GetArenaPool()); - CodeVectorAllocator code_allocator(&arena); + if (compiler_driver->IsMethodVerifiedWithoutFailures(method_idx, class_def_idx, dex_file) || + verifier::CanCompilerHandleVerificationFailure( + verified_method->GetEncounteredVerificationFailures())) { + ArenaAllocator allocator(runtime->GetArenaPool()); + ArenaStack arena_stack(runtime->GetArenaPool()); + CodeVectorAllocator code_allocator(&allocator); std::unique_ptr<CodeGenerator> codegen; + bool compiled_intrinsic = false; { + DexCompilationUnit dex_compilation_unit( + jclass_loader, + runtime->GetClassLinker(), + dex_file, + code_item, + class_def_idx, + method_idx, + access_flags, + /* verified_method */ nullptr, // Not needed by the Optimizing compiler. + dex_cache); ScopedObjectAccess soa(Thread::Current()); + ArtMethod* method = compiler_driver->ResolveMethod( + soa, dex_cache, jclass_loader, &dex_compilation_unit, method_idx, invoke_type); VariableSizedHandleScope handles(soa.Self()); // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(soa.Self(), kNative); - codegen.reset( - TryCompile(&arena, - &code_allocator, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - jclass_loader, - dex_file, - dex_cache, - nullptr, - /* osr */ false, - &handles)); + if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { + DCHECK(compiler_driver->GetCompilerOptions().IsBootImage()); + codegen.reset( + TryCompileIntrinsic(&allocator, + &arena_stack, + &code_allocator, + dex_compilation_unit, + method, + &handles)); + if (codegen != nullptr) { + compiled_intrinsic = true; + } + } + if (codegen == nullptr) { + codegen.reset( + TryCompile(&allocator, + &arena_stack, + &code_allocator, + dex_compilation_unit, + method, + /* osr */ false, + &handles)); + } } if (codegen.get() != nullptr) { - MaybeRecordStat(MethodCompilationStat::kCompiled); - method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); + compiled_method = Emit(&allocator, + &code_allocator, + codegen.get(), + compiled_intrinsic ? nullptr : code_item); + if (compiled_intrinsic) { + compiled_method->MarkAsIntrinsic(); + } if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { - MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << dex_file.PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats); + codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting. + size_t total_allocated = allocator.BytesAllocated() + arena_stack.PeakBytesAllocated(); + if (total_allocated > kArenaAllocatorMemoryReportThreshold) { + MemStats mem_stats(allocator.GetMemStats()); + MemStats peak_stats(arena_stack.GetPeakStats()); + LOG(INFO) << "Used " << total_allocated << " bytes of arena memory for compiling " + << dex_file.PrettyMethod(method_idx) + << "\n" << Dumpable<MemStats>(mem_stats) + << "\n" << Dumpable<MemStats>(peak_stats); } } } } else { + MethodCompilationStat method_stat; if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerifyAtRuntime); + method_stat = MethodCompilationStat::kNotCompiledVerifyAtRuntime; } else { - MaybeRecordStat(MethodCompilationStat::kNotCompiledVerificationError); + method_stat = MethodCompilationStat::kNotCompiledVerificationError; } + MaybeRecordStat(compilation_stats_.get(), method_stat); } if (kIsDebugBuild && @@ -1099,10 +1087,71 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, // regressing. std::string method_name = dex_file.PrettyMethod(method_idx); bool shouldCompile = method_name.find("$opt$") != std::string::npos; - DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; + DCHECK((compiled_method != nullptr) || !shouldCompile) << "Didn't compile " << method_name; } - return method; + return compiled_method; +} + +CompiledMethod* OptimizingCompiler::JniCompile(uint32_t access_flags, + uint32_t method_idx, + const DexFile& dex_file, + Handle<mirror::DexCache> dex_cache) const { + if (GetCompilerDriver()->GetCompilerOptions().IsBootImage()) { + ScopedObjectAccess soa(Thread::Current()); + Runtime* runtime = Runtime::Current(); + ArtMethod* method = runtime->GetClassLinker()->LookupResolvedMethod( + method_idx, dex_cache.Get(), /* class_loader */ nullptr); + if (method != nullptr && UNLIKELY(method->IsIntrinsic())) { + ScopedNullHandle<mirror::ClassLoader> class_loader; // null means boot class path loader. + DexCompilationUnit dex_compilation_unit( + class_loader, + runtime->GetClassLinker(), + dex_file, + /* code_item */ nullptr, + /* class_def_idx */ DexFile::kDexNoIndex16, + method_idx, + access_flags, + /* verified_method */ nullptr, + dex_cache); + ArenaAllocator allocator(runtime->GetArenaPool()); + ArenaStack arena_stack(runtime->GetArenaPool()); + CodeVectorAllocator code_allocator(&allocator); + VariableSizedHandleScope handles(soa.Self()); + // Go to native so that we don't block GC during compilation. + ScopedThreadSuspension sts(soa.Self(), kNative); + std::unique_ptr<CodeGenerator> codegen( + TryCompileIntrinsic(&allocator, + &arena_stack, + &code_allocator, + dex_compilation_unit, + method, + &handles)); + if (codegen != nullptr) { + CompiledMethod* compiled_method = Emit(&allocator, + &code_allocator, + codegen.get(), + /* code_item_for_osr_check */ nullptr); + compiled_method->MarkAsIntrinsic(); + return compiled_method; + } + } + } + + JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( + GetCompilerDriver(), access_flags, method_idx, dex_file); + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kCompiledNativeStub); + return CompiledMethod::SwapAllocCompiledMethod( + GetCompilerDriver(), + jni_compiled_method.GetInstructionSet(), + jni_compiled_method.GetCode(), + jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + /* method_info */ ArrayRef<const uint8_t>(), + /* vmap_table */ ArrayRef<const uint8_t>(), + jni_compiled_method.GetCfi(), + /* patches */ ArrayRef<const linker::LinkerPatch>()); } Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { @@ -1111,12 +1160,7 @@ Compiler* CreateOptimizingCompiler(CompilerDriver* driver) { bool IsCompilingWithCoreImage() { const std::string& image = Runtime::Current()->GetImageLocation(); - // TODO: This is under-approximating... - if (android::base::EndsWith(image, "core.art") || - android::base::EndsWith(image, "core-optimizing.art")) { - return true; - } - return false; + return CompilerDriver::IsCoreImageFilename(image); } bool EncodeArtMethodInInlineInfo(ArtMethod* method ATTRIBUTE_UNUSED) { @@ -1154,40 +1198,97 @@ bool OptimizingCompiler::JitCompile(Thread* self, const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset()); const uint32_t method_idx = method->GetDexMethodIndex(); const uint32_t access_flags = method->GetAccessFlags(); - const InvokeType invoke_type = method->GetInvokeType(); - ArenaAllocator arena(Runtime::Current()->GetJitArenaPool()); - CodeVectorAllocator code_allocator(&arena); + Runtime* runtime = Runtime::Current(); + ArenaAllocator allocator(runtime->GetJitArenaPool()); + + if (UNLIKELY(method->IsNative())) { + JniCompiledMethod jni_compiled_method = ArtQuickJniCompileMethod( + GetCompilerDriver(), access_flags, method_idx, *dex_file); + ScopedNullHandle<mirror::ObjectArray<mirror::Object>> roots; + ArenaSet<ArtMethod*, std::less<ArtMethod*>> cha_single_implementation_list( + allocator.Adapter(kArenaAllocCHA)); + const void* code = code_cache->CommitCode( + self, + method, + /* stack_map_data */ nullptr, + /* method_info_data */ nullptr, + /* roots_data */ nullptr, + jni_compiled_method.GetFrameSize(), + jni_compiled_method.GetCoreSpillMask(), + jni_compiled_method.GetFpSpillMask(), + jni_compiled_method.GetCode().data(), + jni_compiled_method.GetCode().size(), + /* data_size */ 0u, + osr, + roots, + /* has_should_deoptimize_flag */ false, + cha_single_implementation_list); + if (code == nullptr) { + return false; + } + + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + if (compiler_options.GenerateAnyDebugInfo()) { + const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); + const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); + debug::MethodDebugInfo info = {}; + DCHECK(info.custom_name.empty()); + info.dex_file = dex_file; + info.class_def_index = class_def_idx; + info.dex_method_index = method_idx; + info.access_flags = access_flags; + info.code_item = code_item; + info.isa = jni_compiled_method.GetInstructionSet(); + info.deduped = false; + info.is_native_debuggable = compiler_options.GetNativeDebuggable(); + info.is_optimized = true; + info.is_code_address_text_relative = false; + info.code_address = code_address; + info.code_size = jni_compiled_method.GetCode().size(); + info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); + info.code_info = nullptr; + info.cfi = jni_compiled_method.GetCfi(); + GenerateJitDebugInfo(method, info); + } + + Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); + if (jit_logger != nullptr) { + jit_logger->WriteLog(code, jni_compiled_method.GetCode().size(), method); + } + return true; + } + + ArenaStack arena_stack(runtime->GetJitArenaPool()); + CodeVectorAllocator code_allocator(&allocator); VariableSizedHandleScope handles(self); std::unique_ptr<CodeGenerator> codegen; { + DexCompilationUnit dex_compilation_unit( + class_loader, + runtime->GetClassLinker(), + *dex_file, + code_item, + class_def_idx, + method_idx, + access_flags, + /* verified_method */ nullptr, + dex_cache); + // Go to native so that we don't block GC during compilation. ScopedThreadSuspension sts(self, kNative); codegen.reset( - TryCompile(&arena, + TryCompile(&allocator, + &arena_stack, &code_allocator, - code_item, - access_flags, - invoke_type, - class_def_idx, - method_idx, - class_loader, - *dex_file, - dex_cache, + dex_compilation_unit, method, osr, &handles)); if (codegen.get() == nullptr) { return false; } - - if (kArenaAllocatorCountAllocations) { - if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) { - MemStats mem_stats(arena.GetMemStats()); - LOG(INFO) << dex_file->PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats); - } - } } size_t stack_map_size = 0; @@ -1203,6 +1304,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, self, class_linker->GetClassRoot(ClassLinker::kObjectArrayClass), number_of_roots))); if (roots == nullptr) { // Out of memory, just clear the exception to avoid any Java exception uncaught problems. + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); DCHECK(self->IsExceptionPending()); self->ClearException(); return false; @@ -1219,12 +1321,12 @@ bool OptimizingCompiler::JitCompile(Thread* self, &method_info_data, &roots_data); if (stack_map_data == nullptr || roots_data == nullptr) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); return false; } - MaybeRecordStat(MethodCompilationStat::kCompiled); codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), MemoryRegion(method_info_data, method_info_size), - *code_item); + code_item); codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data); const void* code = code_cache->CommitCode( @@ -1245,16 +1347,17 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetGraph()->GetCHASingleImplementationList()); if (code == nullptr) { + MaybeRecordStat(compilation_stats_.get(), MethodCompilationStat::kJitOutOfMemoryForCommit); code_cache->ClearData(self, stack_map_data, roots_data); return false; } const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); - if (compiler_options.GetGenerateDebugInfo()) { + if (compiler_options.GenerateAnyDebugInfo()) { const auto* method_header = reinterpret_cast<const OatQuickMethodHeader*>(code); const uintptr_t code_address = reinterpret_cast<uintptr_t>(method_header->GetCode()); - debug::MethodDebugInfo info = debug::MethodDebugInfo(); - info.trampoline_name = nullptr; + debug::MethodDebugInfo info = {}; + DCHECK(info.custom_name.empty()); info.dex_file = dex_file; info.class_def_index = class_def_idx; info.dex_method_index = method_idx; @@ -1270,19 +1373,50 @@ bool OptimizingCompiler::JitCompile(Thread* self, info.frame_size_in_bytes = method_header->GetFrameSizeInBytes(); info.code_info = stack_map_size == 0 ? nullptr : stack_map_data; info.cfi = ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()); - std::vector<uint8_t> elf_file = debug::WriteDebugElfFileForMethods( - GetCompilerDriver()->GetInstructionSet(), - GetCompilerDriver()->GetInstructionSetFeatures(), - ArrayRef<const debug::MethodDebugInfo>(&info, 1)); - CreateJITCodeEntryForAddress(code_address, std::move(elf_file)); + GenerateJitDebugInfo(method, info); } - Runtime::Current()->GetJit()->AddMemoryUsage(method, arena.BytesUsed()); + Runtime::Current()->GetJit()->AddMemoryUsage(method, allocator.BytesUsed()); if (jit_logger != nullptr) { jit_logger->WriteLog(code, code_allocator.GetSize(), method); } + if (kArenaAllocatorCountAllocations) { + codegen.reset(); // Release codegen's ScopedArenaAllocator for memory accounting. + size_t total_allocated = allocator.BytesAllocated() + arena_stack.PeakBytesAllocated(); + if (total_allocated > kArenaAllocatorMemoryReportThreshold) { + MemStats mem_stats(allocator.GetMemStats()); + MemStats peak_stats(arena_stack.GetPeakStats()); + LOG(INFO) << "Used " << total_allocated << " bytes of arena memory for compiling " + << dex_file->PrettyMethod(method_idx) + << "\n" << Dumpable<MemStats>(mem_stats) + << "\n" << Dumpable<MemStats>(peak_stats); + } + } + return true; } +void OptimizingCompiler::GenerateJitDebugInfo(ArtMethod* method, debug::MethodDebugInfo info) { + const CompilerOptions& compiler_options = GetCompilerDriver()->GetCompilerOptions(); + DCHECK(compiler_options.GenerateAnyDebugInfo()); + + // If both flags are passed, generate full debug info. + const bool mini_debug_info = !compiler_options.GetGenerateDebugInfo(); + + // Create entry for the single method that we just compiled. + std::vector<uint8_t> elf_file = debug::MakeElfFileForJIT( + GetCompilerDriver()->GetInstructionSet(), + GetCompilerDriver()->GetInstructionSetFeatures(), + mini_debug_info, + ArrayRef<const debug::MethodDebugInfo>(&info, 1)); + MutexLock mu(Thread::Current(), *Locks::native_debug_interface_lock_); + AddNativeDebugInfoForJit(reinterpret_cast<const void*>(info.code_address), elf_file); + + VLOG(jit) + << "JIT mini-debug-info added for " << ArtMethod::PrettyMethod(method) + << " size=" << PrettySize(elf_file.size()) + << " total_size=" << PrettySize(GetJitNativeDebugInfoMemUsage()); +} + } // namespace art diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h index a211c5472a..00194ff1fe 100644 --- a/compiler/optimizing/optimizing_compiler_stats.h +++ b/compiler/optimizing/optimizing_compiler_stats.h @@ -22,14 +22,19 @@ #include <string> #include <type_traits> -#include "atomic.h" +#include "base/atomic.h" +#include "base/globals.h" +#include "base/logging.h" // For VLOG_IS_ON. namespace art { -enum MethodCompilationStat { - kAttemptCompilation = 0, +enum class MethodCompilationStat { + kAttemptBytecodeCompilation = 0, + kAttemptIntrinsicCompilation, + kCompiledNativeStub, + kCompiledIntrinsic, + kCompiledBytecode, kCHAInline, - kCompiled, kInlinedInvoke, kReplacedInvokeWithSimplePattern, kInstructionSimplifications, @@ -62,12 +67,15 @@ enum MethodCompilationStat { kBooleanSimplified, kIntrinsicRecognized, kLoopInvariantMoved, + kLoopVectorized, + kLoopVectorizedIdiom, kSelectGenerated, kRemovedInstanceOf, kInlinedInvokeVirtualOrInterface, kImplicitNullCheckGenerated, kExplicitNullCheckGenerated, kSimplifyIf, + kSimplifyThrowingInvoke, kInstructionSunk, kNotInlinedUnresolvedEntrypoint, kNotInlinedDexCache, @@ -86,8 +94,15 @@ enum MethodCompilationStat { kNotInlinedWont, kNotInlinedRecursiveBudget, kNotInlinedProxy, + kConstructorFenceGeneratedNew, + kConstructorFenceGeneratedFinal, + kConstructorFenceRemovedLSE, + kConstructorFenceRemovedPFRA, + kConstructorFenceRemovedCFRE, + kJitOutOfMemoryForCommit, kLastStat }; +std::ostream& operator<<(std::ostream& os, const MethodCompilationStat& rhs); class OptimizingCompilerStats { public: @@ -97,7 +112,15 @@ class OptimizingCompilerStats { } void RecordStat(MethodCompilationStat stat, uint32_t count = 1) { - compile_stats_[stat] += count; + size_t stat_index = static_cast<size_t>(stat); + DCHECK_LT(stat_index, arraysize(compile_stats_)); + compile_stats_[stat_index] += count; + } + + uint32_t GetStat(MethodCompilationStat stat) const { + size_t stat_index = static_cast<size_t>(stat); + DCHECK_LT(stat_index, arraysize(compile_stats_)); + return compile_stats_[stat_index]; } void Log() const { @@ -106,18 +129,29 @@ class OptimizingCompilerStats { return; } - if (compile_stats_[kAttemptCompilation] == 0) { + uint32_t compiled_intrinsics = GetStat(MethodCompilationStat::kCompiledIntrinsic); + uint32_t compiled_native_stubs = GetStat(MethodCompilationStat::kCompiledNativeStub); + uint32_t bytecode_attempts = + GetStat(MethodCompilationStat::kAttemptBytecodeCompilation); + if (compiled_intrinsics == 0u && compiled_native_stubs == 0u && bytecode_attempts == 0u) { LOG(INFO) << "Did not compile any method."; } else { - float compiled_percent = - compile_stats_[kCompiled] * 100.0f / compile_stats_[kAttemptCompilation]; - LOG(INFO) << "Attempted compilation of " << compile_stats_[kAttemptCompilation] - << " methods: " << std::fixed << std::setprecision(2) - << compiled_percent << "% (" << compile_stats_[kCompiled] << ") compiled."; - - for (size_t i = 0; i < kLastStat; i++) { + uint32_t compiled_bytecode_methods = + GetStat(MethodCompilationStat::kCompiledBytecode); + // Successful intrinsic compilation preempts other compilation attempts but failed intrinsic + // compilation shall still count towards bytecode or native stub compilation attempts. + uint32_t num_compilation_attempts = + compiled_intrinsics + compiled_native_stubs + bytecode_attempts; + uint32_t num_successful_compilations = + compiled_intrinsics + compiled_native_stubs + compiled_bytecode_methods; + float compiled_percent = num_successful_compilations * 100.0f / num_compilation_attempts; + LOG(INFO) << "Attempted compilation of " + << num_compilation_attempts << " methods: " << std::fixed << std::setprecision(2) + << compiled_percent << "% (" << num_successful_compilations << ") compiled."; + + for (size_t i = 0; i < arraysize(compile_stats_); ++i) { if (compile_stats_[i] != 0) { - LOG(INFO) << PrintMethodCompilationStat(static_cast<MethodCompilationStat>(i)) << ": " + LOG(INFO) << "OptStat#" << static_cast<MethodCompilationStat>(i) << ": " << compile_stats_[i]; } } @@ -125,7 +159,7 @@ class OptimizingCompilerStats { } void AddTo(OptimizingCompilerStats* other_stats) { - for (size_t i = 0; i != kLastStat; ++i) { + for (size_t i = 0; i != arraysize(compile_stats_); ++i) { uint32_t count = compile_stats_[i]; if (count != 0) { other_stats->RecordStat(static_cast<MethodCompilationStat>(i), count); @@ -134,88 +168,25 @@ class OptimizingCompilerStats { } void Reset() { - for (size_t i = 0; i != kLastStat; ++i) { - compile_stats_[i] = 0u; + for (std::atomic<uint32_t>& stat : compile_stats_) { + stat = 0u; } } private: - std::string PrintMethodCompilationStat(MethodCompilationStat stat) const { - std::string name; - switch (stat) { - case kAttemptCompilation : name = "AttemptCompilation"; break; - case kCHAInline : name = "CHAInline"; break; - case kCompiled : name = "Compiled"; break; - case kInlinedInvoke : name = "InlinedInvoke"; break; - case kReplacedInvokeWithSimplePattern: name = "ReplacedInvokeWithSimplePattern"; break; - case kInstructionSimplifications: name = "InstructionSimplifications"; break; - case kInstructionSimplificationsArch: name = "InstructionSimplificationsArch"; break; - case kUnresolvedMethod : name = "UnresolvedMethod"; break; - case kUnresolvedField : name = "UnresolvedField"; break; - case kUnresolvedFieldNotAFastAccess : name = "UnresolvedFieldNotAFastAccess"; break; - case kRemovedCheckedCast: name = "RemovedCheckedCast"; break; - case kRemovedDeadInstruction: name = "RemovedDeadInstruction"; break; - case kRemovedNullCheck: name = "RemovedNullCheck"; break; - case kNotCompiledSkipped: name = "NotCompiledSkipped"; break; - case kNotCompiledInvalidBytecode: name = "NotCompiledInvalidBytecode"; break; - case kNotCompiledThrowCatchLoop : name = "NotCompiledThrowCatchLoop"; break; - case kNotCompiledAmbiguousArrayOp : name = "NotCompiledAmbiguousArrayOp"; break; - case kNotCompiledHugeMethod : name = "NotCompiledHugeMethod"; break; - case kNotCompiledLargeMethodNoBranches : name = "NotCompiledLargeMethodNoBranches"; break; - case kNotCompiledMalformedOpcode : name = "NotCompiledMalformedOpcode"; break; - case kNotCompiledNoCodegen : name = "NotCompiledNoCodegen"; break; - case kNotCompiledPathological : name = "NotCompiledPathological"; break; - case kNotCompiledSpaceFilter : name = "NotCompiledSpaceFilter"; break; - case kNotCompiledUnhandledInstruction : name = "NotCompiledUnhandledInstruction"; break; - case kNotCompiledUnsupportedIsa : name = "NotCompiledUnsupportedIsa"; break; - case kNotCompiledVerificationError : name = "NotCompiledVerificationError"; break; - case kNotCompiledVerifyAtRuntime : name = "NotCompiledVerifyAtRuntime"; break; - case kInlinedMonomorphicCall: name = "InlinedMonomorphicCall"; break; - case kInlinedPolymorphicCall: name = "InlinedPolymorphicCall"; break; - case kMonomorphicCall: name = "MonomorphicCall"; break; - case kPolymorphicCall: name = "PolymorphicCall"; break; - case kMegamorphicCall: name = "MegamorphicCall"; break; - case kBooleanSimplified : name = "BooleanSimplified"; break; - case kIntrinsicRecognized : name = "IntrinsicRecognized"; break; - case kLoopInvariantMoved : name = "LoopInvariantMoved"; break; - case kSelectGenerated : name = "SelectGenerated"; break; - case kRemovedInstanceOf: name = "RemovedInstanceOf"; break; - case kInlinedInvokeVirtualOrInterface: name = "InlinedInvokeVirtualOrInterface"; break; - case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break; - case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break; - case kSimplifyIf: name = "SimplifyIf"; break; - case kInstructionSunk: name = "InstructionSunk"; break; - case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break; - case kNotInlinedDexCache: name = "NotInlinedDexCache"; break; - case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break; - case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break; - case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break; - case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break; - case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break; - case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break; - case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break; - case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break; - case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break; - case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break; - case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break; - case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break; - case kNotInlinedWont: name = "NotInlinedWont"; break; - case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break; - case kNotInlinedProxy: name = "NotInlinedProxy"; break; - - case kLastStat: - LOG(FATAL) << "invalid stat " - << static_cast<std::underlying_type<MethodCompilationStat>::type>(stat); - UNREACHABLE(); - } - return "OptStat#" + name; - } - - std::atomic<uint32_t> compile_stats_[kLastStat]; + std::atomic<uint32_t> compile_stats_[static_cast<size_t>(MethodCompilationStat::kLastStat)]; DISALLOW_COPY_AND_ASSIGN(OptimizingCompilerStats); }; +inline void MaybeRecordStat(OptimizingCompilerStats* compiler_stats, + MethodCompilationStat stat, + uint32_t count = 1) { + if (compiler_stats != nullptr) { + compiler_stats->RecordStat(stat, count); + } +} + } // namespace art #endif // ART_COMPILER_OPTIMIZING_OPTIMIZING_COMPILER_STATS_H_ diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h index 1cdcbd2e9b..6dcbadba6e 100644 --- a/compiler/optimizing/optimizing_unit_test.h +++ b/compiler/optimizing/optimizing_unit_test.h @@ -17,12 +17,21 @@ #ifndef ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ #define ART_COMPILER_OPTIMIZING_OPTIMIZING_UNIT_TEST_H_ -#include "nodes.h" +#include <memory> +#include <vector> + +#include "base/scoped_arena_allocator.h" #include "builder.h" #include "common_compiler_test.h" -#include "dex_file.h" -#include "dex_instruction.h" -#include "handle_scope.h" +#include "dex/code_item_accessors-inl.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" +#include "dex/standard_dex_file.h" +#include "driver/dex_compilation_unit.h" +#include "handle_scope-inl.h" +#include "mirror/class_loader.h" +#include "mirror/dex_cache.h" +#include "nodes.h" #include "scoped_thread_state_change.h" #include "ssa_builder.h" #include "ssa_liveness_analysis.h" @@ -47,10 +56,11 @@ namespace art { LiveInterval* BuildInterval(const size_t ranges[][2], size_t number_of_ranges, - ArenaAllocator* allocator, + ScopedArenaAllocator* allocator, int reg = -1, HInstruction* defined_by = nullptr) { - LiveInterval* interval = LiveInterval::MakeInterval(allocator, Primitive::kPrimInt, defined_by); + LiveInterval* interval = + LiveInterval::MakeInterval(allocator, DataType::Type::kInt32, defined_by); if (defined_by != nullptr) { defined_by->SetLiveInterval(interval); } @@ -77,37 +87,111 @@ void RemoveSuspendChecks(HGraph* graph) { } } -inline HGraph* CreateGraph(ArenaAllocator* allocator) { - return new (allocator) HGraph( - allocator, - *reinterpret_cast<DexFile*>(allocator->Alloc(sizeof(DexFile))), - /*method_idx*/-1, - kRuntimeISA); -} +class ArenaPoolAndAllocator { + public: + ArenaPoolAndAllocator() + : pool_(), allocator_(&pool_), arena_stack_(&pool_), scoped_allocator_(&arena_stack_) { } + + ArenaAllocator* GetAllocator() { return &allocator_; } + ArenaStack* GetArenaStack() { return &arena_stack_; } + ScopedArenaAllocator* GetScopedAllocator() { return &scoped_allocator_; } + + private: + ArenaPool pool_; + ArenaAllocator allocator_; + ArenaStack arena_stack_; + ScopedArenaAllocator scoped_allocator_; +}; + +// Have a separate helper so the OptimizingCFITest can inherit it without causing +// multiple inheritance errors from having two gtest as a parent twice. +class OptimizingUnitTestHelper { + public: + OptimizingUnitTestHelper() : pool_and_allocator_(new ArenaPoolAndAllocator()) { } + + ArenaAllocator* GetAllocator() { return pool_and_allocator_->GetAllocator(); } + ArenaStack* GetArenaStack() { return pool_and_allocator_->GetArenaStack(); } + ScopedArenaAllocator* GetScopedAllocator() { return pool_and_allocator_->GetScopedAllocator(); } + + void ResetPoolAndAllocator() { + pool_and_allocator_.reset(new ArenaPoolAndAllocator()); + handles_.reset(); // When getting rid of the old HGraph, we can also reset handles_. + } -// Create a control-flow graph from Dex instructions. -inline HGraph* CreateCFG(ArenaAllocator* allocator, - const uint16_t* data, - Primitive::Type return_type = Primitive::kPrimInt) { - const DexFile::CodeItem* item = - reinterpret_cast<const DexFile::CodeItem*>(data); - HGraph* graph = CreateGraph(allocator); - - { - ScopedObjectAccess soa(Thread::Current()); - VariableSizedHandleScope handles(soa.Self()); - HGraphBuilder builder(graph, *item, &handles, return_type); - bool graph_built = (builder.BuildGraph() == kAnalysisSuccess); - return graph_built ? graph : nullptr; + HGraph* CreateGraph() { + ArenaAllocator* const allocator = pool_and_allocator_->GetAllocator(); + + // Reserve a big array of 0s so the dex file constructor can offsets from the header. + static constexpr size_t kDexDataSize = 4 * KB; + const uint8_t* dex_data = reinterpret_cast<uint8_t*>(allocator->Alloc(kDexDataSize)); + + // Create the dex file based on the fake data. Call the constructor so that we can use virtual + // functions. Don't use the arena for the StandardDexFile otherwise the dex location leaks. + dex_files_.emplace_back(new StandardDexFile( + dex_data, + sizeof(StandardDexFile::Header), + "no_location", + /*location_checksum*/ 0, + /*oat_dex_file*/ nullptr, + /*container*/ nullptr)); + + return new (allocator) HGraph( + allocator, + pool_and_allocator_->GetArenaStack(), + *dex_files_.back(), + /*method_idx*/-1, + kRuntimeISA); } -} + + // Create a control-flow graph from Dex instructions. + HGraph* CreateCFG(const std::vector<uint16_t>& data, + DataType::Type return_type = DataType::Type::kInt32) { + HGraph* graph = CreateGraph(); + + // The code item data might not aligned to 4 bytes, copy it to ensure that. + const size_t code_item_size = data.size() * sizeof(data.front()); + void* aligned_data = GetAllocator()->Alloc(code_item_size); + memcpy(aligned_data, &data[0], code_item_size); + CHECK_ALIGNED(aligned_data, StandardDexFile::CodeItem::kAlignment); + const DexFile::CodeItem* code_item = reinterpret_cast<const DexFile::CodeItem*>(aligned_data); + + { + ScopedObjectAccess soa(Thread::Current()); + if (handles_ == nullptr) { + handles_.reset(new VariableSizedHandleScope(soa.Self())); + } + const DexCompilationUnit* dex_compilation_unit = + new (graph->GetAllocator()) DexCompilationUnit( + handles_->NewHandle<mirror::ClassLoader>(nullptr), + /* class_linker */ nullptr, + graph->GetDexFile(), + code_item, + /* class_def_index */ DexFile::kDexNoIndex16, + /* method_idx */ dex::kDexNoIndex, + /* access_flags */ 0u, + /* verified_method */ nullptr, + handles_->NewHandle<mirror::DexCache>(nullptr)); + CodeItemDebugInfoAccessor accessor(graph->GetDexFile(), code_item, /*dex_method_idx*/ 0u); + HGraphBuilder builder(graph, dex_compilation_unit, accessor, handles_.get(), return_type); + bool graph_built = (builder.BuildGraph() == kAnalysisSuccess); + return graph_built ? graph : nullptr; + } + } + + private: + std::vector<std::unique_ptr<const StandardDexFile>> dex_files_; + std::unique_ptr<ArenaPoolAndAllocator> pool_and_allocator_; + std::unique_ptr<VariableSizedHandleScope> handles_; +}; + +class OptimizingUnitTest : public CommonCompilerTest, public OptimizingUnitTestHelper {}; // Naive string diff data type. typedef std::list<std::pair<std::string, std::string>> diff_t; // An alias for the empty string used to make it clear that a line is // removed in a diff. -static const std::string removed = ""; +static const std::string removed = ""; // NOLINT [runtime/string] [4] // Naive patch command: apply a diff to a string. inline std::string Patch(const std::string& original, const diff_t& diff) { diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index be470ccb7d..2036b4a370 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -457,7 +457,7 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { DCHECK_NE(kind, Location::kConstant); Location scratch = AllocateScratchLocationFor(kind); // We only care about the move size. - Primitive::Type type = move->Is64BitMove() ? Primitive::kPrimLong : Primitive::kPrimInt; + DataType::Type type = move->Is64BitMove() ? DataType::Type::kInt64 : DataType::Type::kInt32; // Perform (C -> scratch) move->SetDestination(scratch); EmitMove(index); @@ -521,7 +521,8 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { } void ParallelMoveResolverNoSwap::AddPendingMove(Location source, - Location destination, Primitive::Type type) { + Location destination, + DataType::Type type) { pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); } diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 4278861690..e6e069f96e 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -19,8 +19,8 @@ #include "base/arena_containers.h" #include "base/value_object.h" +#include "data_type.h" #include "locations.h" -#include "primitive.h" namespace art { @@ -177,7 +177,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { void UpdateMoveSource(Location from, Location to); - void AddPendingMove(Location source, Location destination, Primitive::Type type); + void AddPendingMove(Location source, Location destination, DataType::Type type); void DeletePendingMove(MoveOperands* move); diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index 5e8fe37669..cb87cabe1c 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -18,8 +18,8 @@ #include "nodes.h" #include "parallel_move_resolver.h" -#include "gtest/gtest.h" #include "gtest/gtest-typed-test.h" +#include "gtest/gtest.h" namespace art { @@ -158,7 +158,7 @@ static HParallelMove* BuildParallelMove(ArenaAllocator* allocator, moves->AddMove( Location::RegisterLocation(operands[i][0]), Location::RegisterLocation(operands[i][1]), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); } return moves; @@ -264,12 +264,12 @@ TYPED_TEST(ParallelMoveTest, ConstantLast) { moves->AddMove( Location::ConstantLocation(new (&allocator) HIntConstant(0)), Location::RegisterLocation(0), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterLocation(1), Location::RegisterLocation(2), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(1 -> 2) (C -> 0)", resolver.GetMessage().c_str()); @@ -285,12 +285,12 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(4), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); @@ -302,12 +302,12 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(4), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); ASSERT_STREQ("(2 -> 4) (0,1 -> 2,3)", resolver.GetMessage().c_str()); @@ -319,12 +319,12 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(0), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -339,17 +339,17 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -365,17 +365,17 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -391,17 +391,17 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(7), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(1), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -416,12 +416,12 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterPairLocation(2, 3), Location::RegisterPairLocation(0, 1), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -436,12 +436,12 @@ TYPED_TEST(ParallelMoveTest, Pairs) { moves->AddMove( Location::RegisterPairLocation(2, 3), Location::RegisterPairLocation(0, 1), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -473,17 +473,17 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(0), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterLocation(3), Location::RegisterLocation(1), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -499,17 +499,17 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { moves->AddMove( Location::RegisterLocation(2), Location::RegisterLocation(0), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterLocation(3), Location::RegisterLocation(1), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -527,17 +527,17 @@ TYPED_TEST(ParallelMoveTest, MultiCycles) { moves->AddMove( Location::RegisterLocation(10), Location::RegisterLocation(5), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(4, 5), Location::DoubleStackSlot(32), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::DoubleStackSlot(32), Location::RegisterPairLocation(10, 11), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -560,17 +560,17 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { moves->AddMove( Location::RegisterLocation(0), Location::RegisterLocation(1), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(1), Location::StackSlot(48), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::StackSlot(48), Location::RegisterLocation(0), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -587,17 +587,17 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves) { moves->AddMove( Location::RegisterPairLocation(0, 1), Location::RegisterPairLocation(2, 3), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterPairLocation(2, 3), Location::DoubleStackSlot(32), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::DoubleStackSlot(32), Location::RegisterPairLocation(0, 1), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { @@ -619,17 +619,17 @@ TYPED_TEST(ParallelMoveTest, CyclesWith64BitsMoves2) { moves->AddMove( Location::RegisterLocation(0), Location::RegisterLocation(3), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); moves->AddMove( Location::RegisterPairLocation(2, 3), Location::RegisterPairLocation(0, 1), - Primitive::kPrimLong, + DataType::Type::kInt64, nullptr); moves->AddMove( Location::RegisterLocation(7), Location::RegisterLocation(2), - Primitive::kPrimInt, + DataType::Type::kInt32, nullptr); resolver.EmitNativeCode(moves); if (TestFixture::has_swap) { diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc index 21b645279e..9d5358514e 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.cc +++ b/compiler/optimizing/pc_relative_fixups_mips.cc @@ -52,7 +52,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } // Insert the base at the start of the entry block, move it to a better // position later in MoveBaseIfNeeded(). - base_ = new (GetGraph()->GetArena()) HMipsComputeBaseMethodAddress(); + base_ = new (GetGraph()->GetAllocator()) HMipsComputeBaseMethodAddress(); HBasicBlock* entry_block = GetGraph()->GetEntryBlock(); entry_block->InsertInstructionBefore(base_, entry_block->GetFirstInstruction()); DCHECK(base_ != nullptr); @@ -75,6 +75,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { switch (load_kind) { case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: case HLoadClass::LoadKind::kBootImageAddress: + case HLoadClass::LoadKind::kBootImageClassTable: case HLoadClass::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); @@ -88,8 +89,9 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); switch (load_kind) { - case HLoadString::LoadKind::kBootImageAddress: case HLoadString::LoadKind::kBootImageLinkTimePcRelative: + case HLoadString::LoadKind::kBootImageAddress: + case HLoadString::LoadKind::kBootImageInternTable: case HLoadString::LoadKind::kBssEntry: // Add a base register for PC-relative literals on R2. InitializePCRelativeBasePointer(); @@ -110,7 +112,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { InitializePCRelativeBasePointer(); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); - HMipsPackedSwitch* mips_switch = new (graph->GetArena()) HMipsPackedSwitch( + HMipsPackedSwitch* mips_switch = new (graph->GetAllocator()) HMipsPackedSwitch( switch_insn->GetStartValue(), switch_insn->GetNumEntries(), switch_insn->InputAt(0), diff --git a/compiler/optimizing/pc_relative_fixups_mips.h b/compiler/optimizing/pc_relative_fixups_mips.h index 5a7397bf9d..ec2c711f8d 100644 --- a/compiler/optimizing/pc_relative_fixups_mips.h +++ b/compiler/optimizing/pc_relative_fixups_mips.h @@ -29,7 +29,7 @@ namespace mips { class PcRelativeFixups : public HOptimization { public: PcRelativeFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats) - : HOptimization(graph, "pc_relative_fixups_mips", stats), + : HOptimization(graph, kPcRelativeFixupsMipsPassName, stats), codegen_(codegen) {} static constexpr const char* kPcRelativeFixupsMipsPassName = "pc_relative_fixups_mips"; diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc index 2743df9dcf..f92f4b274a 100644 --- a/compiler/optimizing/pc_relative_fixups_x86.cc +++ b/compiler/optimizing/pc_relative_fixups_x86.cc @@ -63,7 +63,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitReturn(HReturn* ret) OVERRIDE { HConstant* value = ret->InputAt(0)->AsConstant(); - if ((value != nullptr && Primitive::IsFloatingPointType(value->GetType()))) { + if ((value != nullptr && DataType::IsFloatingPointType(value->GetType()))) { ReplaceInput(ret, value, 0, true); } } @@ -83,6 +83,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadClass(HLoadClass* load_class) OVERRIDE { HLoadClass::LoadKind load_kind = load_class->GetLoadKind(); if (load_kind == HLoadClass::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadClass::LoadKind::kBootImageClassTable || load_kind == HLoadClass::LoadKind::kBssEntry) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_class); load_class->AddSpecialInput(method_address); @@ -92,6 +93,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void VisitLoadString(HLoadString* load_string) OVERRIDE { HLoadString::LoadKind load_kind = load_string->GetLoadKind(); if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative || + load_kind == HLoadString::LoadKind::kBootImageInternTable || load_kind == HLoadString::LoadKind::kBssEntry) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(load_string); load_string->AddSpecialInput(method_address); @@ -100,7 +102,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void BinaryFP(HBinaryOperation* bin) { HConstant* rhs = bin->InputAt(1)->AsConstant(); - if (rhs != nullptr && Primitive::IsFloatingPointType(rhs->GetType())) { + if (rhs != nullptr && DataType::IsFloatingPointType(rhs->GetType())) { ReplaceInput(bin, rhs, 1, false); } } @@ -130,12 +132,12 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { } void VisitNeg(HNeg* neg) OVERRIDE { - if (Primitive::IsFloatingPointType(neg->GetType())) { + if (DataType::IsFloatingPointType(neg->GetType())) { // We need to replace the HNeg with a HX86FPNeg in order to address the constant area. HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(neg); HGraph* graph = GetGraph(); HBasicBlock* block = neg->GetBlock(); - HX86FPNeg* x86_fp_neg = new (graph->GetArena()) HX86FPNeg( + HX86FPNeg* x86_fp_neg = new (graph->GetAllocator()) HX86FPNeg( neg->GetType(), neg->InputAt(0), method_address, @@ -154,7 +156,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(switch_insn); HGraph* graph = GetGraph(); HBasicBlock* block = switch_insn->GetBlock(); - HX86PackedSwitch* x86_switch = new (graph->GetArena()) HX86PackedSwitch( + HX86PackedSwitch* x86_switch = new (graph->GetAllocator()) HX86PackedSwitch( switch_insn->GetStartValue(), switch_insn->GetNumEntries(), switch_insn->InputAt(0), @@ -174,7 +176,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { // Insert the base at the start of the entry block, move it to a better // position later in MoveBaseIfNeeded(). HX86ComputeBaseMethodAddress* method_address = - new (GetGraph()->GetArena()) HX86ComputeBaseMethodAddress(); + new (GetGraph()->GetAllocator()) HX86ComputeBaseMethodAddress(); if (has_irreducible_loops) { cursor->GetBlock()->InsertInstructionBefore(method_address, cursor); } else { @@ -188,7 +190,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { void ReplaceInput(HInstruction* insn, HConstant* value, int input_index, bool materialize) { HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(insn); HX86LoadFromConstantTable* load_constant = - new (GetGraph()->GetArena()) HX86LoadFromConstantTable(method_address, value); + new (GetGraph()->GetAllocator()) HX86LoadFromConstantTable(method_address, value); if (!materialize) { load_constant->MarkEmittedAtUseSite(); } @@ -223,7 +225,7 @@ class PCRelativeHandlerVisitor : public HGraphVisitor { HInputsRef inputs = invoke->GetInputs(); for (size_t i = 0; i < inputs.size(); i++) { HConstant* input = inputs[i]->AsConstant(); - if (input != nullptr && Primitive::IsFloatingPointType(input->GetType())) { + if (input != nullptr && DataType::IsFloatingPointType(input->GetType())) { ReplaceInput(invoke, input, i, true); } } diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc index aa42fd647b..f843c008d8 100644 --- a/compiler/optimizing/prepare_for_register_allocation.cc +++ b/compiler/optimizing/prepare_for_register_allocation.cc @@ -16,7 +16,9 @@ #include "prepare_for_register_allocation.h" +#include "dex/dex_file_types.h" #include "jni_internal.h" +#include "optimizing_compiler_stats.h" #include "well_known_classes.h" namespace art { @@ -51,16 +53,18 @@ void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) { void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) { check->ReplaceWith(check->InputAt(0)); if (check->IsStringCharAt()) { - // Add a fake environment for String.charAt() inline info as we want - // the exception to appear as being thrown from there. + // Add a fake environment for String.charAt() inline info as we want the exception + // to appear as being thrown from there. Skip if we're compiling String.charAt() itself. ArtMethod* char_at_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt); - ArenaAllocator* arena = GetGraph()->GetArena(); - HEnvironment* environment = new (arena) HEnvironment(arena, - /* number_of_vregs */ 0u, - char_at_method, - /* dex_pc */ DexFile::kDexNoIndex, - check); - check->InsertRawEnvironment(environment); + if (GetGraph()->GetArtMethod() != char_at_method) { + ArenaAllocator* allocator = GetGraph()->GetAllocator(); + HEnvironment* environment = new (allocator) HEnvironment(allocator, + /* number_of_vregs */ 0u, + char_at_method, + /* dex_pc */ dex::kDexNoIndex, + check); + check->InsertRawEnvironment(environment); + } } } @@ -75,7 +79,7 @@ void PrepareForRegisterAllocation::VisitArraySet(HArraySet* instruction) { // BoundType (as value input of this ArraySet) with a NullConstant. // If so, this ArraySet no longer needs a type check. if (value->IsNullConstant()) { - DCHECK_EQ(value->GetType(), Primitive::kPrimNot); + DCHECK_EQ(value->GetType(), DataType::Type::kReference); if (instruction->NeedsTypeCheck()) { instruction->ClearNeedsTypeCheck(); } @@ -190,8 +194,9 @@ void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* cons // TODO: GetAssociatedAllocation should not care about multiple inputs // if we are in prepare_for_register_allocation pass only. constructor_fence->GetBlock()->RemoveInstruction(constructor_fence); + MaybeRecordStat(stats_, + MethodCompilationStat::kConstructorFenceRemovedPFRA); return; - // TODO: actually remove the dmb from the .S entrypoints (initialized variants only). } // HNewArray does not need this check because the art_quick_alloc_array does not itself @@ -208,8 +213,8 @@ void PrepareForRegisterAllocation::VisitConstructorFence(HConstructorFence* cons void PrepareForRegisterAllocation::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { if (invoke->IsStaticWithExplicitClinitCheck()) { - HLoadClass* last_input = invoke->GetInputs().back()->AsLoadClass(); - DCHECK(last_input != nullptr) + HInstruction* last_input = invoke->GetInputs().back(); + DCHECK(last_input->IsLoadClass()) << "Last input is not HLoadClass. It is " << last_input->DebugName(); // Detach the explicit class initialization check from the invoke. diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h index 395d4ba2ee..2c64f016c1 100644 --- a/compiler/optimizing/prepare_for_register_allocation.h +++ b/compiler/optimizing/prepare_for_register_allocation.h @@ -21,6 +21,8 @@ namespace art { +class OptimizingCompilerStats; + /** * A simplification pass over the graph before doing register allocation. * For example it changes uses of null checks and bounds checks to the original @@ -28,7 +30,9 @@ namespace art { */ class PrepareForRegisterAllocation : public HGraphDelegateVisitor { public: - explicit PrepareForRegisterAllocation(HGraph* graph) : HGraphDelegateVisitor(graph) {} + explicit PrepareForRegisterAllocation(HGraph* graph, + OptimizingCompilerStats* stats = nullptr) + : HGraphDelegateVisitor(graph, stats) {} void Run(); diff --git a/compiler/optimizing/pretty_printer_test.cc b/compiler/optimizing/pretty_printer_test.cc index 1af94f3445..6ef386b4a5 100644 --- a/compiler/optimizing/pretty_printer_test.cc +++ b/compiler/optimizing/pretty_printer_test.cc @@ -14,31 +14,33 @@ * limitations under the License. */ +#include "pretty_printer.h" + #include "base/arena_allocator.h" #include "builder.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "pretty_printer.h" #include "gtest/gtest.h" namespace art { -static void TestCode(const uint16_t* data, const char* expected) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +class PrettyPrinterTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, const char* expected); +}; + +void PrettyPrinterTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { + HGraph* graph = CreateCFG(data); StringPrettyPrinter printer(graph); printer.VisitInsertionOrder(); ASSERT_STREQ(expected, printer.str().c_str()); } -class PrettyPrinterTest : public CommonCompilerTest {}; - TEST_F(PrettyPrinterTest, ReturnVoid) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID); const char* expected = @@ -65,7 +67,7 @@ TEST_F(PrettyPrinterTest, CFG1) { "BasicBlock 3, pred: 2\n" " 4: Exit\n"; - const uint16_t data[] = + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -87,7 +89,7 @@ TEST_F(PrettyPrinterTest, CFG2) { "BasicBlock 4, pred: 3\n" " 5: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x100, Instruction::GOTO | 0x100, Instruction::RETURN_VOID); @@ -109,21 +111,21 @@ TEST_F(PrettyPrinterTest, CFG3) { "BasicBlock 4, pred: 2\n" " 5: Exit\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO | 0x200, Instruction::RETURN_VOID, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_16, 3, Instruction::RETURN_VOID, Instruction::GOTO_16, 0xFFFF); TestCode(data2, expected); - const uint16_t data3[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data3 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 4, 0, Instruction::RETURN_VOID, Instruction::GOTO_32, 0xFFFF, 0xFFFF); @@ -142,13 +144,13 @@ TEST_F(PrettyPrinterTest, CFG4) { "BasicBlock 3, pred: 0, succ: 1\n" " 0: Goto 1\n"; - const uint16_t data1[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data1 = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); TestCode(data1, expected); - const uint16_t data2[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data2 = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data2, expected); @@ -164,7 +166,7 @@ TEST_F(PrettyPrinterTest, CFG5) { "BasicBlock 3, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::RETURN_VOID, Instruction::GOTO | 0x100, Instruction::GOTO | 0xFE00); @@ -190,7 +192,7 @@ TEST_F(PrettyPrinterTest, CFG6) { "BasicBlock 5, pred: 1, succ: 3\n" " 0: Goto 3\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -218,7 +220,7 @@ TEST_F(PrettyPrinterTest, CFG7) { "BasicBlock 6, pred: 1, succ: 2\n" " 1: Goto 2\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -238,7 +240,7 @@ TEST_F(PrettyPrinterTest, IntConstant) { "BasicBlock 2, pred: 1\n" " 4: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc index f172e16ff9..67a61fc01d 100644 --- a/compiler/optimizing/reference_type_propagation.cc +++ b/compiler/optimizing/reference_type_propagation.cc @@ -18,8 +18,11 @@ #include "art_field-inl.h" #include "art_method-inl.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/enums.h" #include "class_linker-inl.h" +#include "handle_scope-inl.h" #include "mirror/class-inl.h" #include "mirror/dex_cache.h" #include "scoped_thread_state_change-inl.h" @@ -70,14 +73,16 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> hint_dex_cache, HandleCache* handle_cache, - ArenaVector<HInstruction*>* worklist, bool is_first_run) : HGraphDelegateVisitor(graph), class_loader_(class_loader), hint_dex_cache_(hint_dex_cache), handle_cache_(handle_cache), - worklist_(worklist), - is_first_run_(is_first_run) {} + allocator_(graph->GetArenaStack()), + worklist_(allocator_.Adapter(kArenaAllocReferenceTypePropagation)), + is_first_run_(is_first_run) { + worklist_.reserve(kDefaultWorklistSize); + } void VisitDeoptimize(HDeoptimize* deopt) OVERRIDE; void VisitNewInstance(HNewInstance* new_instance) OVERRIDE; @@ -87,9 +92,6 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitLoadException(HLoadException* instr) OVERRIDE; void VisitNewArray(HNewArray* instr) OVERRIDE; void VisitParameterValue(HParameterValue* instr) OVERRIDE; - void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); - void SetClassAsTypeInfo(HInstruction* instr, ObjPtr<mirror::Class> klass, bool is_exact) - REQUIRES_SHARED(Locks::mutator_lock_); void VisitInstanceFieldGet(HInstanceFieldGet* instr) OVERRIDE; void VisitStaticFieldGet(HStaticFieldGet* instr) OVERRIDE; void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instr) OVERRIDE; @@ -99,16 +101,39 @@ class ReferenceTypePropagation::RTPVisitor : public HGraphDelegateVisitor { void VisitCheckCast(HCheckCast* instr) OVERRIDE; void VisitBoundType(HBoundType* instr) OVERRIDE; void VisitNullCheck(HNullCheck* instr) OVERRIDE; + void VisitPhi(HPhi* phi); + + void VisitBasicBlock(HBasicBlock* block); + void ProcessWorklist(); + + private: + void UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info); + void SetClassAsTypeInfo(HInstruction* instr, ObjPtr<mirror::Class> klass, bool is_exact) + REQUIRES_SHARED(Locks::mutator_lock_); + void BoundTypeForIfNotNull(HBasicBlock* block); + static void BoundTypeForIfInstanceOf(HBasicBlock* block); + static bool UpdateNullability(HInstruction* instr); + static void UpdateBoundType(HBoundType* bound_type) REQUIRES_SHARED(Locks::mutator_lock_); + void UpdateArrayGet(HArrayGet* instr) REQUIRES_SHARED(Locks::mutator_lock_); + void UpdatePhi(HPhi* phi) REQUIRES_SHARED(Locks::mutator_lock_); + bool UpdateReferenceTypeInfo(HInstruction* instr); void UpdateReferenceTypeInfo(HInstruction* instr, dex::TypeIndex type_idx, const DexFile& dex_file, bool is_exact); - private: + void AddToWorklist(HInstruction* instruction); + void AddDependentInstructionsToWorklist(HInstruction* instruction); + + static constexpr size_t kDefaultWorklistSize = 8; + Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> hint_dex_cache_; - HandleCache* handle_cache_; - ArenaVector<HInstruction*>* worklist_; + HandleCache* const handle_cache_; + + // Use local allocator for allocating memory. + ScopedArenaAllocator allocator_; + ScopedArenaVector<HInstruction*> worklist_; const bool is_first_run_; }; @@ -122,7 +147,6 @@ ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph, class_loader_(class_loader), hint_dex_cache_(hint_dex_cache), handle_cache_(handles), - worklist_(graph->GetArena()->Adapter(kArenaAllocReferenceTypePropagation)), is_first_run_(is_first_run) { } @@ -133,7 +157,7 @@ void ReferenceTypePropagation::ValidateTypes() { for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) { HInstruction* instr = iti.Current(); - if (instr->GetType() == Primitive::kPrimNot) { + if (instr->GetType() == DataType::Type::kReference) { DCHECK(instr->GetReferenceTypeInfo().IsValid()) << "Invalid RTI for instruction: " << instr->DebugName(); if (instr->IsBoundType()) { @@ -158,7 +182,6 @@ void ReferenceTypePropagation::Visit(HInstruction* instruction) { class_loader_, hint_dex_cache_, &handle_cache_, - &worklist_, is_first_run_); instruction->Accept(&visitor); } @@ -235,7 +258,7 @@ static void BoundTypeIn(HInstruction* receiver, : start_block->GetFirstInstruction(); if (ShouldCreateBoundType( insert_point, receiver, class_rti, start_instruction, start_block)) { - bound_type = new (receiver->GetBlock()->GetGraph()->GetArena()) HBoundType(receiver); + bound_type = new (receiver->GetBlock()->GetGraph()->GetAllocator()) HBoundType(receiver); bound_type->SetUpperBound(class_rti, /* bound_can_be_null */ false); start_block->InsertInstructionBefore(bound_type, insert_point); // To comply with the RTP algorithm, don't type the bound type just yet, it will @@ -319,26 +342,20 @@ static void BoundTypeForClassCheck(HInstruction* check) { } void ReferenceTypePropagation::Run() { - worklist_.reserve(kDefaultWorklistSize); + RTPVisitor visitor(graph_, class_loader_, hint_dex_cache_, &handle_cache_, is_first_run_); // To properly propagate type info we need to visit in the dominator-based order. // Reverse post order guarantees a node's dominators are visited first. // We take advantage of this order in `VisitBasicBlock`. for (HBasicBlock* block : graph_->GetReversePostOrder()) { - VisitBasicBlock(block); + visitor.VisitBasicBlock(block); } - ProcessWorklist(); + visitor.ProcessWorklist(); ValidateTypes(); } -void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { - RTPVisitor visitor(graph_, - class_loader_, - hint_dex_cache_, - &handle_cache_, - &worklist_, - is_first_run_); +void ReferenceTypePropagation::RTPVisitor::VisitBasicBlock(HBasicBlock* block) { // Handle Phis first as there might be instructions in the same block who depend on them. for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { VisitPhi(it.Current()->AsPhi()); @@ -348,7 +365,7 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { // last visited instruction, use `HInstructionIteratorHandleChanges` iterator. for (HInstructionIteratorHandleChanges it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instr = it.Current(); - instr->Accept(&visitor); + instr->Accept(this); } // Add extra nodes to bound types. @@ -357,7 +374,7 @@ void ReferenceTypePropagation::VisitBasicBlock(HBasicBlock* block) { BoundTypeForClassCheck(block->GetLastInstruction()); } -void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { +void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfNotNull(HBasicBlock* block) { HIf* ifInstruction = block->GetLastInstruction()->AsIf(); if (ifInstruction == nullptr) { return; @@ -391,7 +408,7 @@ void ReferenceTypePropagation::BoundTypeForIfNotNull(HBasicBlock* block) { : ifInstruction->IfFalseSuccessor(); ReferenceTypeInfo object_rti = ReferenceTypeInfo::Create( - handle_cache_.GetObjectClassHandle(), /* is_exact */ false); + handle_cache_->GetObjectClassHandle(), /* is_exact */ false); BoundTypeIn(obj, notNullBlock, /* start_instruction */ nullptr, object_rti); } @@ -469,7 +486,7 @@ static bool MatchIfInstanceOf(HIf* ifInstruction, // `if (x instanceof ClassX) { }` // If that's the case insert an HBoundType instruction to bound the type of `x` // to `ClassX` in the scope of the dominated blocks. -void ReferenceTypePropagation::BoundTypeForIfInstanceOf(HBasicBlock* block) { +void ReferenceTypePropagation::RTPVisitor::BoundTypeForIfInstanceOf(HBasicBlock* block) { HIf* ifInstruction = block->GetLastInstruction()->AsIf(); if (ifInstruction == nullptr) { return; @@ -520,14 +537,13 @@ void ReferenceTypePropagation::RTPVisitor::SetClassAsTypeInfo(HInstruction* inst Thread* self = Thread::Current(); StackHandleScope<2> hs(self); const DexFile& dex_file = *invoke->GetTargetMethod().dex_file; + uint32_t dex_method_index = invoke->GetTargetMethod().index; Handle<mirror::DexCache> dex_cache( hs.NewHandle(FindDexCacheWithHint(self, dex_file, hint_dex_cache_))); - // Use a null loader. We should probably use the compiling method's class loader, - // but then we would need to pass it to RTPVisitor just for this debug check. Since - // the method is from the String class, the null loader is good enough. + // Use a null loader, the target method is in a boot classpath dex file. Handle<mirror::ClassLoader> loader(hs.NewHandle<mirror::ClassLoader>(nullptr)); ArtMethod* method = cl->ResolveMethod<ClassLinker::ResolveMode::kNoChecks>( - dex_file, invoke->GetDexMethodIndex(), dex_cache, loader, nullptr, kDirect); + dex_method_index, dex_cache, loader, /* referrer */ nullptr, kDirect); DCHECK(method != nullptr); mirror::Class* declaring_class = method->GetDeclaringClass(); DCHECK(declaring_class != nullptr); @@ -555,12 +571,12 @@ void ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* dex::TypeIndex type_idx, const DexFile& dex_file, bool is_exact) { - DCHECK_EQ(instr->GetType(), Primitive::kPrimNot); + DCHECK_EQ(instr->GetType(), DataType::Type::kReference); ScopedObjectAccess soa(Thread::Current()); ObjPtr<mirror::DexCache> dex_cache = FindDexCacheWithHint(soa.Self(), dex_file, hint_dex_cache_); - ObjPtr<mirror::Class> klass = - ClassLinker::LookupResolvedType(type_idx, dex_cache, class_loader_.Get()); + ObjPtr<mirror::Class> klass = Runtime::Current()->GetClassLinker()->LookupResolvedType( + type_idx, dex_cache, class_loader_.Get()); SetClassAsTypeInfo(instr, klass, is_exact); } @@ -576,7 +592,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitNewArray(HNewArray* instr) { void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* instr) { // We check if the existing type is valid: the inliner may have set it. - if (instr->GetType() == Primitive::kPrimNot && !instr->GetReferenceTypeInfo().IsValid()) { + if (instr->GetType() == DataType::Type::kReference && !instr->GetReferenceTypeInfo().IsValid()) { UpdateReferenceTypeInfo(instr, instr->GetTypeIndex(), instr->GetDexFile(), @@ -586,7 +602,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitParameterValue(HParameterValue* void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstruction* instr, const FieldInfo& info) { - if (instr->GetType() != Primitive::kPrimNot) { + if (instr->GetType() != DataType::Type::kReference) { return; } @@ -595,7 +611,7 @@ void ReferenceTypePropagation::RTPVisitor::UpdateFieldAccessTypeInfo(HInstructio // The field is unknown only during tests. if (info.GetField() != nullptr) { - klass = info.GetField()->GetType<false>(); + klass = info.GetField()->LookupResolvedType(); } SetClassAsTypeInfo(instr, klass, /* is_exact */ false); @@ -612,7 +628,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitStaticFieldGet(HStaticFieldGet* void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedInstanceFieldGet( HUnresolvedInstanceFieldGet* instr) { // TODO: Use descriptor to get the actual type. - if (instr->GetFieldType() == Primitive::kPrimNot) { + if (instr->GetFieldType() == DataType::Type::kReference) { instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti()); } } @@ -620,7 +636,7 @@ void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedInstanceFieldGet( void ReferenceTypePropagation::RTPVisitor::VisitUnresolvedStaticFieldGet( HUnresolvedStaticFieldGet* instr) { // TODO: Use descriptor to get the actual type. - if (instr->GetFieldType() == Primitive::kPrimNot) { + if (instr->GetFieldType() == DataType::Type::kReference) { instr->SetReferenceTypeInfo(instr->GetBlock()->GetGraph()->GetInexactObjectRti()); } } @@ -728,8 +744,8 @@ void ReferenceTypePropagation::RTPVisitor::VisitCheckCast(HCheckCast* check_cast } } -void ReferenceTypePropagation::VisitPhi(HPhi* phi) { - if (phi->IsDead() || phi->GetType() != Primitive::kPrimNot) { +void ReferenceTypePropagation::RTPVisitor::VisitPhi(HPhi* phi) { + if (phi->IsDead() || phi->GetType() != DataType::Type::kReference) { return; } @@ -754,8 +770,23 @@ void ReferenceTypePropagation::VisitPhi(HPhi* phi) { } } +void ReferenceTypePropagation::FixUpInstructionType(HInstruction* instruction, + VariableSizedHandleScope* handle_scope) { + if (instruction->IsSelect()) { + ScopedObjectAccess soa(Thread::Current()); + HandleCache handle_cache(handle_scope); + HSelect* select = instruction->AsSelect(); + ReferenceTypeInfo false_rti = select->GetFalseValue()->GetReferenceTypeInfo(); + ReferenceTypeInfo true_rti = select->GetTrueValue()->GetReferenceTypeInfo(); + select->SetReferenceTypeInfo(MergeTypes(false_rti, true_rti, &handle_cache)); + } else { + LOG(FATAL) << "Invalid instruction in FixUpInstructionType"; + } +} + ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& a, - const ReferenceTypeInfo& b) { + const ReferenceTypeInfo& b, + HandleCache* handle_cache) { if (!b.IsValid()) { return a; } @@ -780,7 +811,7 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& is_exact = false; } else if (!a_is_interface && !b_is_interface) { result_type_handle = - handle_cache_.NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); + handle_cache->NewHandle(a_type_handle->GetCommonSuperClass(b_type_handle)); is_exact = false; } else { // This can happen if: @@ -790,15 +821,15 @@ ReferenceTypeInfo ReferenceTypePropagation::MergeTypes(const ReferenceTypeInfo& // void foo(Interface i, boolean cond) { // Object o = cond ? i : new Object(); // } - result_type_handle = handle_cache_.GetObjectClassHandle(); + result_type_handle = handle_cache->GetObjectClassHandle(); is_exact = false; } return ReferenceTypeInfo::Create(result_type_handle, is_exact); } -void ReferenceTypePropagation::UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) { - DCHECK_EQ(Primitive::kPrimNot, instr->GetType()); +void ReferenceTypePropagation::RTPVisitor::UpdateArrayGet(HArrayGet* instr) { + DCHECK_EQ(DataType::Type::kReference, instr->GetType()); ReferenceTypeInfo parent_rti = instr->InputAt(0)->GetReferenceTypeInfo(); if (!parent_rti.IsValid()) { @@ -808,7 +839,7 @@ void ReferenceTypePropagation::UpdateArrayGet(HArrayGet* instr, HandleCache* han Handle<mirror::Class> handle = parent_rti.GetTypeHandle(); if (handle->IsObjectArrayClass() && IsAdmissible(handle->GetComponentType())) { ReferenceTypeInfo::TypeHandle component_handle = - handle_cache->NewHandle(handle->GetComponentType()); + handle_cache_->NewHandle(handle->GetComponentType()); bool is_exact = component_handle->CannotBeAssignedFromOtherTypes(); instr->SetReferenceTypeInfo(ReferenceTypeInfo::Create(component_handle, is_exact)); } else { @@ -817,7 +848,7 @@ void ReferenceTypePropagation::UpdateArrayGet(HArrayGet* instr, HandleCache* han } } -bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { +bool ReferenceTypePropagation::RTPVisitor::UpdateReferenceTypeInfo(HInstruction* instr) { ScopedObjectAccess soa(Thread::Current()); ReferenceTypeInfo previous_rti = instr->GetReferenceTypeInfo(); @@ -833,7 +864,7 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { } else if (instr->IsArrayGet()) { // TODO: consider if it's worth "looking back" and binding the input object // to an array type. - UpdateArrayGet(instr->AsArrayGet(), &handle_cache_); + UpdateArrayGet(instr->AsArrayGet()); } else { LOG(FATAL) << "Invalid instruction (should not get here)"; } @@ -842,29 +873,29 @@ bool ReferenceTypePropagation::UpdateReferenceTypeInfo(HInstruction* instr) { } void ReferenceTypePropagation::RTPVisitor::VisitInvoke(HInvoke* instr) { - if (instr->GetType() != Primitive::kPrimNot) { + if (instr->GetType() != DataType::Type::kReference) { return; } ScopedObjectAccess soa(Thread::Current()); ArtMethod* method = instr->GetResolvedMethod(); - mirror::Class* klass = (method == nullptr) ? nullptr : method->GetReturnType(/* resolve */ false); + ObjPtr<mirror::Class> klass = (method == nullptr) ? nullptr : method->LookupResolvedReturnType(); SetClassAsTypeInfo(instr, klass, /* is_exact */ false); } void ReferenceTypePropagation::RTPVisitor::VisitArrayGet(HArrayGet* instr) { - if (instr->GetType() != Primitive::kPrimNot) { + if (instr->GetType() != DataType::Type::kReference) { return; } ScopedObjectAccess soa(Thread::Current()); - UpdateArrayGet(instr, handle_cache_); + UpdateArrayGet(instr); if (!instr->GetReferenceTypeInfo().IsValid()) { - worklist_->push_back(instr); + worklist_.push_back(instr); } } -void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { +void ReferenceTypePropagation::RTPVisitor::UpdateBoundType(HBoundType* instr) { ReferenceTypeInfo input_rti = instr->InputAt(0)->GetReferenceTypeInfo(); if (!input_rti.IsValid()) { return; // No new info yet. @@ -888,7 +919,7 @@ void ReferenceTypePropagation::UpdateBoundType(HBoundType* instr) { // NullConstant inputs are ignored during merging as they do not provide any useful information. // If all the inputs are NullConstants then the type of the phi will be set to Object. -void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { +void ReferenceTypePropagation::RTPVisitor::UpdatePhi(HPhi* instr) { DCHECK(instr->IsLive()); HInputsRef inputs = instr->GetInputs(); @@ -916,7 +947,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { if (inputs[i]->IsNullConstant()) { continue; } - new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo()); + new_rti = MergeTypes(new_rti, inputs[i]->GetReferenceTypeInfo(), handle_cache_); if (new_rti.IsValid() && new_rti.IsObjectClass()) { if (!new_rti.IsExact()) { break; @@ -933,7 +964,7 @@ void ReferenceTypePropagation::UpdatePhi(HPhi* instr) { // Re-computes and updates the nullability of the instruction. Returns whether or // not the nullability was changed. -bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { +bool ReferenceTypePropagation::RTPVisitor::UpdateNullability(HInstruction* instr) { DCHECK((instr->IsPhi() && instr->AsPhi()->IsLive()) || instr->IsBoundType() || instr->IsNullCheck() @@ -961,7 +992,7 @@ bool ReferenceTypePropagation::UpdateNullability(HInstruction* instr) { return existing_can_be_null != instr->CanBeNull(); } -void ReferenceTypePropagation::ProcessWorklist() { +void ReferenceTypePropagation::RTPVisitor::ProcessWorklist() { while (!worklist_.empty()) { HInstruction* instruction = worklist_.back(); worklist_.pop_back(); @@ -973,19 +1004,20 @@ void ReferenceTypePropagation::ProcessWorklist() { } } -void ReferenceTypePropagation::AddToWorklist(HInstruction* instruction) { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimNot) +void ReferenceTypePropagation::RTPVisitor::AddToWorklist(HInstruction* instruction) { + DCHECK_EQ(instruction->GetType(), DataType::Type::kReference) << instruction->DebugName() << ":" << instruction->GetType(); worklist_.push_back(instruction); } -void ReferenceTypePropagation::AddDependentInstructionsToWorklist(HInstruction* instruction) { +void ReferenceTypePropagation::RTPVisitor::AddDependentInstructionsToWorklist( + HInstruction* instruction) { for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) { HInstruction* user = use.GetUser(); if ((user->IsPhi() && user->AsPhi()->IsLive()) || user->IsBoundType() || user->IsNullCheck() - || (user->IsArrayGet() && (user->GetType() == Primitive::kPrimNot))) { + || (user->IsArrayGet() && (user->GetType() == DataType::Type::kReference))) { AddToWorklist(user); } } diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h index 215e96786b..fd4dad2b45 100644 --- a/compiler/optimizing/reference_type_propagation.h +++ b/compiler/optimizing/reference_type_propagation.h @@ -18,12 +18,10 @@ #define ART_COMPILER_OPTIMIZING_REFERENCE_TYPE_PROPAGATION_H_ #include "base/arena_containers.h" -#include "driver/dex_compilation_unit.h" -#include "handle_scope-inl.h" +#include "mirror/class-inl.h" #include "nodes.h" #include "obj_ptr.h" #include "optimization.h" -#include "optimizing_compiler_stats.h" namespace art { @@ -46,7 +44,7 @@ class ReferenceTypePropagation : public HOptimization { // Returns true if klass is admissible to the propagation: non-null and resolved. // For an array type, we also check if the component type is admissible. - static bool IsAdmissible(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) { + static bool IsAdmissible(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) { return klass != nullptr && klass->IsResolved() && (!klass->IsArrayClass() || IsAdmissible(klass->GetComponentType())); @@ -54,6 +52,12 @@ class ReferenceTypePropagation : public HOptimization { static constexpr const char* kReferenceTypePropagationPassName = "reference_type_propagation"; + // Fix the reference type for an instruction whose inputs have changed. + // For a select instruction, the reference types of the inputs are merged + // and the resulting reference type is set on the select instruction. + static void FixUpInstructionType(HInstruction* instruction, + VariableSizedHandleScope* handle_scope); + private: class HandleCache { public: @@ -85,23 +89,9 @@ class ReferenceTypePropagation : public HOptimization { class RTPVisitor; - void VisitPhi(HPhi* phi); - void VisitBasicBlock(HBasicBlock* block); - void UpdateBoundType(HBoundType* bound_type) REQUIRES_SHARED(Locks::mutator_lock_); - void UpdatePhi(HPhi* phi) REQUIRES_SHARED(Locks::mutator_lock_); - void BoundTypeForIfNotNull(HBasicBlock* block); - void BoundTypeForIfInstanceOf(HBasicBlock* block); - void ProcessWorklist(); - void AddToWorklist(HInstruction* instr); - void AddDependentInstructionsToWorklist(HInstruction* instr); - - bool UpdateNullability(HInstruction* instr); - bool UpdateReferenceTypeInfo(HInstruction* instr); - - static void UpdateArrayGet(HArrayGet* instr, HandleCache* handle_cache) - REQUIRES_SHARED(Locks::mutator_lock_); - - ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) + static ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, + const ReferenceTypeInfo& b, + HandleCache* handle_cache) REQUIRES_SHARED(Locks::mutator_lock_); void ValidateTypes(); @@ -114,13 +104,9 @@ class ReferenceTypePropagation : public HOptimization { Handle<mirror::DexCache> hint_dex_cache_; HandleCache handle_cache_; - ArenaVector<HInstruction*> worklist_; - // Whether this reference type propagation is the first run we are doing. const bool is_first_run_; - static constexpr size_t kDefaultWorklistSize = 8; - friend class ReferenceTypePropagationTest; DISALLOW_COPY_AND_ASSIGN(ReferenceTypePropagation); diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc index 0b49ce1a4c..028b6d3b79 100644 --- a/compiler/optimizing/reference_type_propagation_test.cc +++ b/compiler/optimizing/reference_type_propagation_test.cc @@ -14,12 +14,13 @@ * limitations under the License. */ +#include "reference_type_propagation.h" + #include "base/arena_allocator.h" #include "builder.h" #include "nodes.h" #include "object_lock.h" #include "optimizing_unit_test.h" -#include "reference_type_propagation.h" namespace art { @@ -27,28 +28,26 @@ namespace art { * Fixture class for unit testing the ReferenceTypePropagation phase. Used to verify the * functionality of methods and situations that are hard to set up with checker tests. */ -class ReferenceTypePropagationTest : public CommonCompilerTest { +class ReferenceTypePropagationTest : public OptimizingUnitTest { public: - ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) { - graph_ = CreateGraph(&allocator_); - } + ReferenceTypePropagationTest() : graph_(CreateGraph()), propagation_(nullptr) { } ~ReferenceTypePropagationTest() { } void SetupPropagation(VariableSizedHandleScope* handles) { graph_->InitializeInexactObjectRTI(handles); - propagation_ = new (&allocator_) ReferenceTypePropagation(graph_, - Handle<mirror::ClassLoader>(), - Handle<mirror::DexCache>(), - handles, - true, - "test_prop"); + propagation_ = new (GetAllocator()) ReferenceTypePropagation(graph_, + Handle<mirror::ClassLoader>(), + Handle<mirror::DexCache>(), + handles, + true, + "test_prop"); } // Relay method to merge type in reference type propagation. ReferenceTypeInfo MergeTypes(const ReferenceTypeInfo& a, const ReferenceTypeInfo& b) REQUIRES_SHARED(Locks::mutator_lock_) { - return propagation_->MergeTypes(a, b); + return propagation_->MergeTypes(a, b, &propagation_->handle_cache_); } // Helper method to construct an invalid type. @@ -67,8 +66,6 @@ class ReferenceTypePropagationTest : public CommonCompilerTest { } // General building fields. - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; ReferenceTypePropagation* propagation_; @@ -162,4 +159,3 @@ TEST_F(ReferenceTypePropagationTest, MergeValidTypes) { } } // namespace art - diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc index ce3a4966aa..27f9ac3990 100644 --- a/compiler/optimizing/register_allocation_resolver.cc +++ b/compiler/optimizing/register_allocation_resolver.cc @@ -16,16 +16,16 @@ #include "register_allocation_resolver.h" +#include "base/bit_vector-inl.h" #include "code_generator.h" #include "linear_order.h" #include "ssa_liveness_analysis.h" namespace art { -RegisterAllocationResolver::RegisterAllocationResolver(ArenaAllocator* allocator, - CodeGenerator* codegen, +RegisterAllocationResolver::RegisterAllocationResolver(CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) - : allocator_(allocator), + : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen), liveness_(liveness) {} @@ -36,7 +36,7 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint size_t float_spill_slots, size_t double_spill_slots, size_t catch_phi_spill_slots, - const ArenaVector<LiveInterval*>& temp_intervals) { + ArrayRef<LiveInterval* const> temp_intervals) { size_t spill_slots = int_spill_slots + long_spill_slots + float_spill_slots @@ -100,24 +100,27 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint // [art method ]. size_t slot = current->GetSpillSlot(); switch (current->GetType()) { - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: slot += long_spill_slots; FALLTHROUGH_INTENDED; - case Primitive::kPrimLong: + case DataType::Type::kUint64: + case DataType::Type::kInt64: slot += float_spill_slots; FALLTHROUGH_INTENDED; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: slot += int_spill_slots; FALLTHROUGH_INTENDED; - case Primitive::kPrimNot: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimShort: + case DataType::Type::kReference: + case DataType::Type::kUint32: + case DataType::Type::kInt32: + case DataType::Type::kUint16: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kBool: + case DataType::Type::kInt16: slot += reserved_out_slots; break; - case Primitive::kPrimVoid: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << current->GetType(); } current->SetSpillSlot(slot * kVRegSize); @@ -205,12 +208,12 @@ void RegisterAllocationResolver::Resolve(ArrayRef<HInstruction* const> safepoint size_t temp_index = liveness_.GetTempIndex(temp); LocationSummary* locations = at->GetLocations(); switch (temp->GetType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: locations->SetTempAt(temp_index, Location::RegisterLocation(temp->GetRegister())); break; - case Primitive::kPrimDouble: - if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + case DataType::Type::kFloat64: + if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { Location location = Location::FpuRegisterPairLocation( temp->GetRegister(), temp->GetHighInterval()->GetRegister()); locations->SetTempAt(temp_index, location); @@ -383,7 +386,7 @@ void RegisterAllocationResolver::ConnectSiblings(LiveInterval* interval) { safepoint_position = safepoint_position->GetNext()) { DCHECK(current->CoversSlow(safepoint_position->GetPosition())); - if (current->GetType() == Primitive::kPrimNot) { + if (current->GetType() == DataType::Type::kReference) { DCHECK(interval->GetDefinedBy()->IsActualObject()) << interval->GetDefinedBy()->DebugName() << '(' << interval->GetDefinedBy()->GetId() << ')' @@ -507,13 +510,13 @@ void RegisterAllocationResolver::AddMove(HParallelMove* move, Location source, Location destination, HInstruction* instruction, - Primitive::Type type) const { - if (type == Primitive::kPrimLong + DataType::Type type) const { + if (type == DataType::Type::kInt64 && codegen_->ShouldSplitLongMoves() // The parallel move resolver knows how to deal with long constants. && !source.IsConstant()) { - move->AddMove(source.ToLow(), destination.ToLow(), Primitive::kPrimInt, instruction); - move->AddMove(source.ToHigh(), destination.ToHigh(), Primitive::kPrimInt, nullptr); + move->AddMove(source.ToLow(), destination.ToLow(), DataType::Type::kInt32, instruction); + move->AddMove(source.ToHigh(), destination.ToHigh(), DataType::Type::kInt32, nullptr); } else { move->AddMove(source, destination, type, instruction); } diff --git a/compiler/optimizing/register_allocation_resolver.h b/compiler/optimizing/register_allocation_resolver.h index d48b1a0bb9..278371777d 100644 --- a/compiler/optimizing/register_allocation_resolver.h +++ b/compiler/optimizing/register_allocation_resolver.h @@ -17,10 +17,9 @@ #ifndef ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATION_RESOLVER_H_ -#include "base/arena_containers.h" #include "base/array_ref.h" #include "base/value_object.h" -#include "primitive.h" +#include "data_type.h" namespace art { @@ -40,9 +39,7 @@ class SsaLivenessAnalysis; */ class RegisterAllocationResolver : ValueObject { public: - RegisterAllocationResolver(ArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& liveness); + RegisterAllocationResolver(CodeGenerator* codegen, const SsaLivenessAnalysis& liveness); void Resolve(ArrayRef<HInstruction* const> safepoints, size_t reserved_out_slots, // Includes slot(s) for the art method. @@ -51,7 +48,7 @@ class RegisterAllocationResolver : ValueObject { size_t float_spill_slots, size_t double_spill_slots, size_t catch_phi_spill_slots, - const ArenaVector<LiveInterval*>& temp_intervals); + ArrayRef<LiveInterval* const> temp_intervals); private: // Update live registers of safepoint location summary. @@ -88,7 +85,7 @@ class RegisterAllocationResolver : ValueObject { Location source, Location destination, HInstruction* instruction, - Primitive::Type type) const; + DataType::Type type) const; ArenaAllocator* const allocator_; CodeGenerator* const codegen_; diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc index 5b768d5d67..bad73e1b61 100644 --- a/compiler/optimizing/register_allocator.cc +++ b/compiler/optimizing/register_allocator.cc @@ -19,46 +19,64 @@ #include <iostream> #include <sstream> +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" #include "code_generator.h" #include "register_allocator_graph_color.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" - namespace art { -RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator, +RegisterAllocator::RegisterAllocator(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) : allocator_(allocator), codegen_(codegen), liveness_(liveness) {} -RegisterAllocator* RegisterAllocator::Create(ArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis, - Strategy strategy) { +std::unique_ptr<RegisterAllocator> RegisterAllocator::Create(ScopedArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis, + Strategy strategy) { switch (strategy) { case kRegisterAllocatorLinearScan: - return new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis); + return std::unique_ptr<RegisterAllocator>( + new (allocator) RegisterAllocatorLinearScan(allocator, codegen, analysis)); case kRegisterAllocatorGraphColor: - return new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis); + return std::unique_ptr<RegisterAllocator>( + new (allocator) RegisterAllocatorGraphColor(allocator, codegen, analysis)); default: LOG(FATAL) << "Invalid register allocation strategy: " << strategy; UNREACHABLE(); } } +RegisterAllocator::~RegisterAllocator() { + if (kIsDebugBuild) { + // Poison live interval pointers with "Error: BAD 71ve1nt3rval." + LiveInterval* bad_live_interval = reinterpret_cast<LiveInterval*>(0xebad7113u); + for (HBasicBlock* block : codegen_->GetGraph()->GetLinearOrder()) { + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + it.Current()->SetLiveInterval(bad_live_interval); + } + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + it.Current()->SetLiveInterval(bad_live_interval); + } + } + } +} + bool RegisterAllocator::CanAllocateRegistersFor(const HGraph& graph ATTRIBUTE_UNUSED, InstructionSet instruction_set) { - return instruction_set == kArm - || instruction_set == kArm64 - || instruction_set == kMips - || instruction_set == kMips64 - || instruction_set == kThumb2 - || instruction_set == kX86 - || instruction_set == kX86_64; + return instruction_set == InstructionSet::kArm + || instruction_set == InstructionSet::kArm64 + || instruction_set == InstructionSet::kMips + || instruction_set == InstructionSet::kMips64 + || instruction_set == InstructionSet::kThumb2 + || instruction_set == InstructionSet::kX86 + || instruction_set == InstructionSet::kX86_64; } class AllRangesIterator : public ValueObject { @@ -88,18 +106,18 @@ class AllRangesIterator : public ValueObject { DISALLOW_COPY_AND_ASSIGN(AllRangesIterator); }; -bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, +bool RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const> intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, - ArenaAllocator* allocator, bool processing_core_registers, bool log_fatal_on_failure) { size_t number_of_registers = processing_core_registers ? codegen.GetNumberOfCoreRegisters() : codegen.GetNumberOfFloatingPointRegisters(); - ArenaVector<ArenaBitVector*> liveness_of_values( - allocator->Adapter(kArenaAllocRegisterAllocatorValidate)); + ScopedArenaAllocator allocator(codegen.GetGraph()->GetArenaStack()); + ScopedArenaVector<ArenaBitVector*> liveness_of_values( + allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); liveness_of_values.reserve(number_of_registers + number_of_spill_slots); size_t max_end = 0u; @@ -113,7 +131,8 @@ bool RegisterAllocator::ValidateIntervals(const ArenaVector<LiveInterval*>& inte // allocated will populate the associated bit vector based on its live ranges. for (size_t i = 0; i < number_of_registers + number_of_spill_slots; ++i) { liveness_of_values.push_back( - ArenaBitVector::Create(allocator, max_end, false, kArenaAllocRegisterAllocatorValidate)); + ArenaBitVector::Create(&allocator, max_end, false, kArenaAllocRegisterAllocatorValidate)); + liveness_of_values.back()->ClearAllBits(); } for (LiveInterval* start_interval : intervals) { diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h index 7e1fff8e2b..18ef69fcab 100644 --- a/compiler/optimizing/register_allocator.h +++ b/compiler/optimizing/register_allocator.h @@ -18,10 +18,9 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_H_ #include "arch/instruction_set.h" -#include "base/arena_containers.h" +#include "base/array_ref.h" #include "base/arena_object.h" #include "base/macros.h" -#include "primitive.h" namespace art { @@ -37,7 +36,7 @@ class SsaLivenessAnalysis; /** * Base class for any register allocator. */ -class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { +class RegisterAllocator : public DeletableArenaObject<kArenaAllocRegisterAllocator> { public: enum Strategy { kRegisterAllocatorLinearScan, @@ -46,12 +45,12 @@ class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { static constexpr Strategy kRegisterAllocatorDefault = kRegisterAllocatorLinearScan; - static RegisterAllocator* Create(ArenaAllocator* allocator, - CodeGenerator* codegen, - const SsaLivenessAnalysis& analysis, - Strategy strategy = kRegisterAllocatorDefault); + static std::unique_ptr<RegisterAllocator> Create(ScopedArenaAllocator* allocator, + CodeGenerator* codegen, + const SsaLivenessAnalysis& analysis, + Strategy strategy = kRegisterAllocatorDefault); - virtual ~RegisterAllocator() = default; + virtual ~RegisterAllocator(); // Main entry point for the register allocator. Given the liveness analysis, // allocates registers to live intervals. @@ -65,18 +64,17 @@ class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { InstructionSet instruction_set); // Verifies that live intervals do not conflict. Used by unit testing. - static bool ValidateIntervals(const ArenaVector<LiveInterval*>& intervals, + static bool ValidateIntervals(ArrayRef<LiveInterval* const> intervals, size_t number_of_spill_slots, size_t number_of_out_slots, const CodeGenerator& codegen, - ArenaAllocator* allocator, bool processing_core_registers, bool log_fatal_on_failure); static constexpr const char* kRegisterAllocatorPassName = "register"; protected: - RegisterAllocator(ArenaAllocator* allocator, + RegisterAllocator(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis); @@ -89,7 +87,7 @@ class RegisterAllocator : public ArenaObject<kArenaAllocRegisterAllocator> { // to find an optimal split position. LiveInterval* SplitBetween(LiveInterval* interval, size_t from, size_t to); - ArenaAllocator* const allocator_; + ScopedArenaAllocator* const allocator_; CodeGenerator* const codegen_; const SsaLivenessAnalysis& liveness_; }; diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc index 5e22772844..fa7ad82316 100644 --- a/compiler/optimizing/register_allocator_graph_color.cc +++ b/compiler/optimizing/register_allocator_graph_color.cc @@ -217,13 +217,12 @@ static float ComputeSpillWeight(LiveInterval* interval, const SsaLivenessAnalysi // and thus whether it is safe to prune it from the interference graph early on. class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { public: - InterferenceNode(ArenaAllocator* allocator, - LiveInterval* interval, + InterferenceNode(LiveInterval* interval, const SsaLivenessAnalysis& liveness) : stage(NodeStage::kInitial), interval_(interval), - adjacent_nodes_(allocator->Adapter(kArenaAllocRegisterAllocator)), - coalesce_opportunities_(allocator->Adapter(kArenaAllocRegisterAllocator)), + adjacent_nodes_(nullptr), + coalesce_opportunities_(nullptr), out_degree_(interval->HasRegister() ? std::numeric_limits<size_t>::max() : 0), alias_(this), spill_weight_(ComputeSpillWeight(interval, liveness)), @@ -232,21 +231,26 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { DCHECK(!interval->IsHighInterval()) << "Pair nodes should be represented by the low interval"; } - void AddInterference(InterferenceNode* other, bool guaranteed_not_interfering_yet) { + void AddInterference(InterferenceNode* other, + bool guaranteed_not_interfering_yet, + ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>>* storage) { DCHECK(!IsPrecolored()) << "To save memory, fixed nodes should not have outgoing interferences"; DCHECK_NE(this, other) << "Should not create self loops in the interference graph"; DCHECK_EQ(this, alias_) << "Should not add interferences to a node that aliases another"; DCHECK_NE(stage, NodeStage::kPruned); DCHECK_NE(other->stage, NodeStage::kPruned); + if (adjacent_nodes_ == nullptr) { + ScopedArenaVector<InterferenceNode*>::allocator_type adapter(storage->get_allocator()); + storage->emplace_back(adapter); + adjacent_nodes_ = &storage->back(); + } if (guaranteed_not_interfering_yet) { - DCHECK(std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other) - == adjacent_nodes_.end()); - adjacent_nodes_.push_back(other); + DCHECK(!ContainsElement(GetAdjacentNodes(), other)); + adjacent_nodes_->push_back(other); out_degree_ += EdgeWeightWith(other); } else { - auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); - if (it == adjacent_nodes_.end()) { - adjacent_nodes_.push_back(other); + if (!ContainsElement(GetAdjacentNodes(), other)) { + adjacent_nodes_->push_back(other); out_degree_ += EdgeWeightWith(other); } } @@ -255,26 +259,29 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { void RemoveInterference(InterferenceNode* other) { DCHECK_EQ(this, alias_) << "Should not remove interferences from a coalesced node"; DCHECK_EQ(other->stage, NodeStage::kPruned) << "Should only remove interferences when pruning"; - auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); - if (it != adjacent_nodes_.end()) { - adjacent_nodes_.erase(it); - out_degree_ -= EdgeWeightWith(other); + if (adjacent_nodes_ != nullptr) { + auto it = std::find(adjacent_nodes_->begin(), adjacent_nodes_->end(), other); + if (it != adjacent_nodes_->end()) { + adjacent_nodes_->erase(it); + out_degree_ -= EdgeWeightWith(other); + } } } bool ContainsInterference(InterferenceNode* other) const { DCHECK(!IsPrecolored()) << "Should not query fixed nodes for interferences"; DCHECK_EQ(this, alias_) << "Should not query a coalesced node for interferences"; - auto it = std::find(adjacent_nodes_.begin(), adjacent_nodes_.end(), other); - return it != adjacent_nodes_.end(); + return ContainsElement(GetAdjacentNodes(), other); } LiveInterval* GetInterval() const { return interval_; } - const ArenaVector<InterferenceNode*>& GetAdjacentNodes() const { - return adjacent_nodes_; + ArrayRef<InterferenceNode*> GetAdjacentNodes() const { + return adjacent_nodes_ != nullptr + ? ArrayRef<InterferenceNode*>(*adjacent_nodes_) + : ArrayRef<InterferenceNode*>(); } size_t GetOutDegree() const { @@ -283,16 +290,22 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { return out_degree_; } - void AddCoalesceOpportunity(CoalesceOpportunity* opportunity) { - coalesce_opportunities_.push_back(opportunity); + void AddCoalesceOpportunity(CoalesceOpportunity* opportunity, + ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>>* storage) { + if (coalesce_opportunities_ == nullptr) { + ScopedArenaVector<CoalesceOpportunity*>::allocator_type adapter(storage->get_allocator()); + storage->emplace_back(adapter); + coalesce_opportunities_ = &storage->back(); + } + coalesce_opportunities_->push_back(opportunity); } void ClearCoalesceOpportunities() { - coalesce_opportunities_.clear(); + coalesce_opportunities_ = nullptr; } bool IsMoveRelated() const { - for (CoalesceOpportunity* opportunity : coalesce_opportunities_) { + for (CoalesceOpportunity* opportunity : GetCoalesceOpportunities()) { if (opportunity->stage == CoalesceStage::kWorklist || opportunity->stage == CoalesceStage::kActive) { return true; @@ -325,8 +338,10 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { return alias_; } - const ArenaVector<CoalesceOpportunity*>& GetCoalesceOpportunities() const { - return coalesce_opportunities_; + ArrayRef<CoalesceOpportunity*> GetCoalesceOpportunities() const { + return coalesce_opportunities_ != nullptr + ? ArrayRef<CoalesceOpportunity*>(*coalesce_opportunities_) + : ArrayRef<CoalesceOpportunity*>(); } float GetSpillWeight() const { @@ -361,10 +376,10 @@ class InterferenceNode : public ArenaObject<kArenaAllocRegisterAllocator> { // All nodes interfering with this one. // We use an unsorted vector as a set, since a tree or hash set is too heavy for the // set sizes that we encounter. Using a vector leads to much better performance. - ArenaVector<InterferenceNode*> adjacent_nodes_; + ScopedArenaVector<InterferenceNode*>* adjacent_nodes_; // Owned by ColoringIteration. // Interference nodes that this node should be coalesced with to reduce moves. - ArenaVector<CoalesceOpportunity*> coalesce_opportunities_; + ScopedArenaVector<CoalesceOpportunity*>* coalesce_opportunities_; // Owned by ColoringIteration. // The maximum number of colors with which this node could interfere. This could be more than // the number of adjacent nodes if this is a pair node, or if some adjacent nodes are pair nodes. @@ -416,7 +431,7 @@ static bool HasGreaterNodePriority(const InterferenceNode* lhs, class ColoringIteration { public: ColoringIteration(RegisterAllocatorGraphColor* register_allocator, - ArenaAllocator* allocator, + ScopedArenaAllocator* allocator, bool processing_core_regs, size_t num_regs) : register_allocator_(register_allocator), @@ -430,15 +445,17 @@ class ColoringIteration { freeze_worklist_(allocator->Adapter(kArenaAllocRegisterAllocator)), spill_worklist_(HasGreaterNodePriority, allocator->Adapter(kArenaAllocRegisterAllocator)), coalesce_worklist_(CoalesceOpportunity::CmpPriority, - allocator->Adapter(kArenaAllocRegisterAllocator)) {} + allocator->Adapter(kArenaAllocRegisterAllocator)), + adjacent_nodes_links_(allocator->Adapter(kArenaAllocRegisterAllocator)), + coalesce_opportunities_links_(allocator->Adapter(kArenaAllocRegisterAllocator)) {} // Use the intervals collected from instructions to construct an // interference graph mapping intervals to adjacency lists. // Also, collect synthesized safepoint nodes, used to keep // track of live intervals across safepoints. // TODO: Should build safepoints elsewhere. - void BuildInterferenceGraph(const ArenaVector<LiveInterval*>& intervals, - const ArenaVector<InterferenceNode*>& physical_nodes); + void BuildInterferenceGraph(const ScopedArenaVector<LiveInterval*>& intervals, + const ScopedArenaVector<InterferenceNode*>& physical_nodes); // Add coalesce opportunities to interference nodes. void FindCoalesceOpportunities(); @@ -456,8 +473,8 @@ class ColoringIteration { // Return prunable nodes. // The register allocator will need to access prunable nodes after coloring // in order to tell the code generator which registers have been assigned. - const ArenaVector<InterferenceNode*>& GetPrunableNodes() const { - return prunable_nodes_; + ArrayRef<InterferenceNode* const> GetPrunableNodes() const { + return ArrayRef<InterferenceNode* const>(prunable_nodes_); } private: @@ -503,51 +520,59 @@ class ColoringIteration { // needed to split intervals and assign spill slots. RegisterAllocatorGraphColor* register_allocator_; - // An arena allocator used for a single graph coloring attempt. - ArenaAllocator* allocator_; + // A scoped arena allocator used for a single graph coloring attempt. + ScopedArenaAllocator* allocator_; const bool processing_core_regs_; const size_t num_regs_; // A map from live intervals to interference nodes. - ArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; + ScopedArenaHashMap<LiveInterval*, InterferenceNode*> interval_node_map_; // Uncolored nodes that should be pruned from the interference graph. - ArenaVector<InterferenceNode*> prunable_nodes_; + ScopedArenaVector<InterferenceNode*> prunable_nodes_; // A stack of nodes pruned from the interference graph, waiting to be pruned. - ArenaStdStack<InterferenceNode*> pruned_nodes_; + ScopedArenaStdStack<InterferenceNode*> pruned_nodes_; // A queue containing low degree, non-move-related nodes that can pruned immediately. - ArenaDeque<InterferenceNode*> simplify_worklist_; + ScopedArenaDeque<InterferenceNode*> simplify_worklist_; // A queue containing low degree, move-related nodes. - ArenaDeque<InterferenceNode*> freeze_worklist_; + ScopedArenaDeque<InterferenceNode*> freeze_worklist_; // A queue containing high degree nodes. // If we have to prune from the spill worklist, we cannot guarantee // the pruned node a color, so we order the worklist by priority. - ArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; + ScopedArenaPriorityQueue<InterferenceNode*, decltype(&HasGreaterNodePriority)> spill_worklist_; // A queue containing coalesce opportunities. // We order the coalesce worklist by priority, since some coalesce opportunities (e.g., those // inside of loops) are more important than others. - ArenaPriorityQueue<CoalesceOpportunity*, - decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; + ScopedArenaPriorityQueue<CoalesceOpportunity*, + decltype(&CoalesceOpportunity::CmpPriority)> coalesce_worklist_; + + // Storage for links to adjacent nodes for interference nodes. + // Using std::deque so that elements do not move when adding new ones. + ScopedArenaDeque<ScopedArenaVector<InterferenceNode*>> adjacent_nodes_links_; + + // Storage for links to coalesce opportunities for interference nodes. + // Using std::deque so that elements do not move when adding new ones. + ScopedArenaDeque<ScopedArenaVector<CoalesceOpportunity*>> coalesce_opportunities_links_; DISALLOW_COPY_AND_ASSIGN(ColoringIteration); }; static bool IsCoreInterval(LiveInterval* interval) { - return !Primitive::IsFloatingPointType(interval->GetType()); + return !DataType::IsFloatingPointType(interval->GetType()); } static size_t ComputeReservedArtMethodSlots(const CodeGenerator& codegen) { return static_cast<size_t>(InstructionSetPointerSize(codegen.GetInstructionSet())) / kVRegSize; } -RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocator, +RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness, bool iterative_move_coalescing) @@ -573,9 +598,8 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat // This includes globally blocked registers, such as the stack pointer. physical_core_nodes_.resize(codegen_->GetNumberOfCoreRegisters(), nullptr); for (size_t i = 0; i < codegen_->GetNumberOfCoreRegisters(); ++i) { - LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimInt); - physical_core_nodes_[i] = - new (allocator_) InterferenceNode(allocator_, interval, liveness); + LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kInt32); + physical_core_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); physical_core_nodes_[i]->stage = NodeStage::kPrecolored; core_intervals_.push_back(interval); if (codegen_->IsBlockedCoreRegister(i)) { @@ -585,9 +609,9 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat // Initialize physical floating point register live intervals and blocked registers. physical_fp_nodes_.resize(codegen_->GetNumberOfFloatingPointRegisters(), nullptr); for (size_t i = 0; i < codegen_->GetNumberOfFloatingPointRegisters(); ++i) { - LiveInterval* interval = LiveInterval::MakeFixedInterval(allocator_, i, Primitive::kPrimFloat); - physical_fp_nodes_[i] = - new (allocator_) InterferenceNode(allocator_, interval, liveness); + LiveInterval* interval = + LiveInterval::MakeFixedInterval(allocator_, i, DataType::Type::kFloat32); + physical_fp_nodes_[i] = new (allocator_) InterferenceNode(interval, liveness); physical_fp_nodes_[i]->stage = NodeStage::kPrecolored; fp_intervals_.push_back(interval); if (codegen_->IsBlockedFloatingPointRegister(i)) { @@ -596,12 +620,14 @@ RegisterAllocatorGraphColor::RegisterAllocatorGraphColor(ArenaAllocator* allocat } } +RegisterAllocatorGraphColor::~RegisterAllocatorGraphColor() {} + void RegisterAllocatorGraphColor::AllocateRegisters() { // (1) Collect and prepare live intervals. ProcessInstructions(); for (bool processing_core_regs : {true, false}) { - ArenaVector<LiveInterval*>& intervals = processing_core_regs + ScopedArenaVector<LiveInterval*>& intervals = processing_core_regs ? core_intervals_ : fp_intervals_; size_t num_registers = processing_core_regs @@ -618,17 +644,15 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { << "should be prioritized over long ones, because they cannot be split further.)"; // Many data structures are cleared between graph coloring attempts, so we reduce - // total memory usage by using a new arena allocator for each attempt. - ArenaAllocator coloring_attempt_allocator(allocator_->GetArenaPool()); + // total memory usage by using a new scoped arena allocator for each attempt. + ScopedArenaAllocator coloring_attempt_allocator(allocator_->GetArenaStack()); ColoringIteration iteration(this, &coloring_attempt_allocator, processing_core_regs, num_registers); - // (2) Build the interference graph. Also gather safepoints. - ArenaVector<InterferenceNode*> safepoints( - coloring_attempt_allocator.Adapter(kArenaAllocRegisterAllocator)); - ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + // (2) Build the interference graph. + ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs ? physical_core_nodes_ : physical_fp_nodes_; iteration.BuildInterferenceGraph(intervals, physical_nodes); @@ -690,7 +714,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { } // for processing_core_instructions // (6) Resolve locations and deconstruct SSA form. - RegisterAllocationResolver(allocator_, codegen_, liveness_) + RegisterAllocationResolver(codegen_, liveness_) .Resolve(ArrayRef<HInstruction* const>(safepoints_), reserved_art_method_slots_ + reserved_out_slots_, num_int_spill_slots_, @@ -698,7 +722,7 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { num_float_spill_slots_, num_double_spill_slots_, catch_phi_spill_slot_counter_, - temp_intervals_); + ArrayRef<LiveInterval* const>(temp_intervals_)); if (kIsDebugBuild) { Validate(/*log_fatal_on_failure*/ true); @@ -707,8 +731,9 @@ void RegisterAllocatorGraphColor::AllocateRegisters() { bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { for (bool processing_core_regs : {true, false}) { - ArenaVector<LiveInterval*> intervals( - allocator_->Adapter(kArenaAllocRegisterAllocatorValidate)); + ScopedArenaAllocator allocator(allocator_->GetArenaStack()); + ScopedArenaVector<LiveInterval*> intervals( + allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); LiveInterval* interval = instruction->GetLiveInterval(); @@ -717,7 +742,7 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { } } - ArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs + ScopedArenaVector<InterferenceNode*>& physical_nodes = processing_core_regs ? physical_core_nodes_ : physical_fp_nodes_; for (InterferenceNode* fixed : physical_nodes) { @@ -741,11 +766,10 @@ bool RegisterAllocatorGraphColor::Validate(bool log_fatal_on_failure) { + num_float_spill_slots_ + num_double_spill_slots_ + catch_phi_spill_slot_counter_; - bool ok = ValidateIntervals(intervals, + bool ok = ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), spill_slots, reserved_art_method_slots_ + reserved_out_slots_, *codegen_, - allocator_, processing_core_regs, log_fatal_on_failure); if (!ok) { @@ -824,7 +848,7 @@ void RegisterAllocatorGraphColor::ProcessInstruction(HInstruction* instruction) CheckForFixedOutput(instruction); AllocateSpillSlotForCatchPhi(instruction); - ArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) + ScopedArenaVector<LiveInterval*>& intervals = IsCoreInterval(interval) ? core_intervals_ : fp_intervals_; if (interval->HasSpillSlot() || instruction->IsConstant()) { @@ -936,7 +960,7 @@ void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instru switch (temp.GetPolicy()) { case Location::kRequiresRegister: { LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); + LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); interval->AddTempUse(instruction, i); core_intervals_.push_back(interval); temp_intervals_.push_back(interval); @@ -945,11 +969,11 @@ void RegisterAllocatorGraphColor::CheckForTempLiveIntervals(HInstruction* instru case Location::kRequiresFpuRegister: { LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); + LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); interval->AddTempUse(instruction, i); fp_intervals_.push_back(interval); temp_intervals_.push_back(interval); - if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { interval->AddHighInterval(/*is_temp*/ true); temp_intervals_.push_back(interval->GetHighInterval()); } @@ -1074,11 +1098,12 @@ void ColoringIteration::AddPotentialInterference(InterferenceNode* from, } else if (to->IsPrecolored()) { // It is important that only a single node represents a given fixed register in the // interference graph. We retrieve that node here. - const ArenaVector<InterferenceNode*>& physical_nodes = to->GetInterval()->IsFloatingPoint() - ? register_allocator_->physical_fp_nodes_ - : register_allocator_->physical_core_nodes_; + const ScopedArenaVector<InterferenceNode*>& physical_nodes = + to->GetInterval()->IsFloatingPoint() ? register_allocator_->physical_fp_nodes_ + : register_allocator_->physical_core_nodes_; InterferenceNode* physical_node = physical_nodes[to->GetInterval()->GetRegister()]; - from->AddInterference(physical_node, /*guaranteed_not_interfering_yet*/ false); + from->AddInterference( + physical_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); DCHECK_EQ(to->GetInterval()->GetRegister(), physical_node->GetInterval()->GetRegister()); DCHECK_EQ(to->GetAlias(), physical_node) << "Fixed nodes should alias the canonical fixed node"; @@ -1096,11 +1121,12 @@ void ColoringIteration::AddPotentialInterference(InterferenceNode* from, physical_nodes[to->GetInterval()->GetHighInterval()->GetRegister()]; DCHECK_EQ(to->GetInterval()->GetHighInterval()->GetRegister(), high_node->GetInterval()->GetRegister()); - from->AddInterference(high_node, /*guaranteed_not_interfering_yet*/ false); + from->AddInterference( + high_node, /*guaranteed_not_interfering_yet*/ false, &adjacent_nodes_links_); } } else { // Standard interference between two uncolored nodes. - from->AddInterference(to, guaranteed_not_interfering_yet); + from->AddInterference(to, guaranteed_not_interfering_yet, &adjacent_nodes_links_); } if (both_directions) { @@ -1155,8 +1181,8 @@ static bool CheckInputOutputCanOverlap(InterferenceNode* in_node, InterferenceNo } void ColoringIteration::BuildInterferenceGraph( - const ArenaVector<LiveInterval*>& intervals, - const ArenaVector<InterferenceNode*>& physical_nodes) { + const ScopedArenaVector<LiveInterval*>& intervals, + const ScopedArenaVector<InterferenceNode*>& physical_nodes) { DCHECK(interval_node_map_.Empty() && prunable_nodes_.empty()); // Build the interference graph efficiently by ordering range endpoints // by position and doing a linear sweep to find interferences. (That is, we @@ -1170,7 +1196,7 @@ void ColoringIteration::BuildInterferenceGraph( // // For simplicity, we create a tuple for each endpoint, and then sort the tuples. // Tuple contents: (position, is_range_beginning, node). - ArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( + ScopedArenaVector<std::tuple<size_t, bool, InterferenceNode*>> range_endpoints( allocator_->Adapter(kArenaAllocRegisterAllocator)); // We reserve plenty of space to avoid excessive copying. @@ -1180,8 +1206,8 @@ void ColoringIteration::BuildInterferenceGraph( for (LiveInterval* sibling = parent; sibling != nullptr; sibling = sibling->GetNextSibling()) { LiveRange* range = sibling->GetFirstRange(); if (range != nullptr) { - InterferenceNode* node = new (allocator_) InterferenceNode( - allocator_, sibling, register_allocator_->liveness_); + InterferenceNode* node = + new (allocator_) InterferenceNode(sibling, register_allocator_->liveness_); interval_node_map_.Insert(std::make_pair(sibling, node)); if (sibling->HasRegister()) { @@ -1216,8 +1242,7 @@ void ColoringIteration::BuildInterferenceGraph( }); // Nodes live at the current position in the linear sweep. - ArenaVector<InterferenceNode*> live( - allocator_->Adapter(kArenaAllocRegisterAllocator)); + ScopedArenaVector<InterferenceNode*> live(allocator_->Adapter(kArenaAllocRegisterAllocator)); // Linear sweep. When we encounter the beginning of a range, we add the corresponding node to the // live set. When we encounter the end of a range, we remove the corresponding node @@ -1260,8 +1285,8 @@ void ColoringIteration::CreateCoalesceOpportunity(InterferenceNode* a, << "Nodes of different memory widths should never be coalesced"; CoalesceOpportunity* opportunity = new (allocator_) CoalesceOpportunity(a, b, kind, position, register_allocator_->liveness_); - a->AddCoalesceOpportunity(opportunity); - b->AddCoalesceOpportunity(opportunity); + a->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); + b->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); coalesce_worklist_.push(opportunity); } @@ -1331,7 +1356,7 @@ void ColoringIteration::FindCoalesceOpportunities() { // Coalesce phi inputs with the corresponding output. HInstruction* defined_by = interval->GetDefinedBy(); if (defined_by != nullptr && defined_by->IsPhi()) { - const ArenaVector<HBasicBlock*>& predecessors = defined_by->GetBlock()->GetPredecessors(); + ArrayRef<HBasicBlock* const> predecessors(defined_by->GetBlock()->GetPredecessors()); HInputsRef inputs = defined_by->GetInputs(); for (size_t i = 0, e = inputs.size(); i < e; ++i) { @@ -1674,7 +1699,7 @@ void ColoringIteration::Combine(InterferenceNode* from, // Add coalesce opportunities. for (CoalesceOpportunity* opportunity : from->GetCoalesceOpportunities()) { if (opportunity->stage != CoalesceStage::kDefunct) { - into->AddCoalesceOpportunity(opportunity); + into->AddCoalesceOpportunity(opportunity, &coalesce_opportunities_links_); } } EnableCoalesceOpportunities(from); @@ -1728,7 +1753,7 @@ void ColoringIteration::Coalesce(CoalesceOpportunity* opportunity) { // Build a mask with a bit set for each register assigned to some // interval in `intervals`. template <typename Container> -static std::bitset<kMaxNumRegs> BuildConflictMask(Container& intervals) { +static std::bitset<kMaxNumRegs> BuildConflictMask(const Container& intervals) { std::bitset<kMaxNumRegs> conflict_mask; for (InterferenceNode* adjacent : intervals) { LiveInterval* conflicting = adjacent->GetInterval(); @@ -1764,7 +1789,7 @@ static size_t FindFirstZeroInConflictMask(std::bitset<kMaxNumRegs> conflict_mask bool ColoringIteration::ColorInterferenceGraph() { DCHECK_LE(num_regs_, kMaxNumRegs) << "kMaxNumRegs is too small"; - ArenaVector<LiveInterval*> colored_intervals( + ScopedArenaVector<LiveInterval*> colored_intervals( allocator_->Adapter(kArenaAllocRegisterAllocator)); bool successful = true; @@ -1887,16 +1912,18 @@ bool ColoringIteration::ColorInterferenceGraph() { return successful; } -void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes) { +void RegisterAllocatorGraphColor::AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes) { // The register allocation resolver will organize the stack based on value type, // so we assign stack slots for each value type separately. - ArenaVector<LiveInterval*> double_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); - ArenaVector<LiveInterval*> long_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); - ArenaVector<LiveInterval*> float_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); - ArenaVector<LiveInterval*> int_intervals(allocator_->Adapter(kArenaAllocRegisterAllocator)); + ScopedArenaAllocator allocator(allocator_->GetArenaStack()); + ScopedArenaAllocatorAdapter<void> adapter = allocator.Adapter(kArenaAllocRegisterAllocator); + ScopedArenaVector<LiveInterval*> double_intervals(adapter); + ScopedArenaVector<LiveInterval*> long_intervals(adapter); + ScopedArenaVector<LiveInterval*> float_intervals(adapter); + ScopedArenaVector<LiveInterval*> int_intervals(adapter); // The set of parent intervals already handled. - ArenaSet<LiveInterval*> seen(allocator_->Adapter(kArenaAllocRegisterAllocator)); + ScopedArenaSet<LiveInterval*> seen(adapter); // Find nodes that need spill slots. for (InterferenceNode* node : nodes) { @@ -1927,24 +1954,27 @@ void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<Interfere // We need to find a spill slot for this interval. Place it in the correct // worklist to be processed later. switch (node->GetInterval()->GetType()) { - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: double_intervals.push_back(parent); break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: long_intervals.push_back(parent); break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: float_intervals.push_back(parent); break; - case Primitive::kPrimNot: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimShort: + case DataType::Type::kReference: + case DataType::Type::kInt32: + case DataType::Type::kUint16: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kBool: + case DataType::Type::kInt16: int_intervals.push_back(parent); break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << node->GetInterval()->GetType(); UNREACHABLE(); } @@ -1952,23 +1982,24 @@ void RegisterAllocatorGraphColor::AllocateSpillSlots(const ArenaVector<Interfere } // Color spill slots for each value type. - ColorSpillSlots(&double_intervals, &num_double_spill_slots_); - ColorSpillSlots(&long_intervals, &num_long_spill_slots_); - ColorSpillSlots(&float_intervals, &num_float_spill_slots_); - ColorSpillSlots(&int_intervals, &num_int_spill_slots_); + ColorSpillSlots(ArrayRef<LiveInterval* const>(double_intervals), &num_double_spill_slots_); + ColorSpillSlots(ArrayRef<LiveInterval* const>(long_intervals), &num_long_spill_slots_); + ColorSpillSlots(ArrayRef<LiveInterval* const>(float_intervals), &num_float_spill_slots_); + ColorSpillSlots(ArrayRef<LiveInterval* const>(int_intervals), &num_int_spill_slots_); } -void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* intervals, - size_t* num_stack_slots_used) { +void RegisterAllocatorGraphColor::ColorSpillSlots(ArrayRef<LiveInterval* const> intervals, + /* out */ size_t* num_stack_slots_used) { // We cannot use the original interference graph here because spill slots are assigned to // all of the siblings of an interval, whereas an interference node represents only a single // sibling. So, we assign spill slots linear-scan-style by sorting all the interval endpoints // by position, and assigning the lowest spill slot available when we encounter an interval // beginning. We ignore lifetime holes for simplicity. - ArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( - allocator_->Adapter(kArenaAllocRegisterAllocator)); + ScopedArenaAllocator allocator(allocator_->GetArenaStack()); + ScopedArenaVector<std::tuple<size_t, bool, LiveInterval*>> interval_endpoints( + allocator.Adapter(kArenaAllocRegisterAllocator)); - for (LiveInterval* parent_interval : *intervals) { + for (LiveInterval* parent_interval : intervals) { DCHECK(parent_interval->IsParent()); DCHECK(!parent_interval->HasSpillSlot()); size_t start = parent_interval->GetStart(); @@ -1988,7 +2019,7 @@ void RegisterAllocatorGraphColor::ColorSpillSlots(ArenaVector<LiveInterval*>* in < std::tie(std::get<0>(rhs), std::get<1>(rhs)); }); - ArenaBitVector taken(allocator_, 0, true); + ArenaBitVector taken(&allocator, 0, true, kArenaAllocRegisterAllocator); for (auto it = interval_endpoints.begin(), end = interval_endpoints.end(); it != end; ++it) { // Extract information from the current tuple. LiveInterval* parent_interval; diff --git a/compiler/optimizing/register_allocator_graph_color.h b/compiler/optimizing/register_allocator_graph_color.h index 548687f784..3072c92e0f 100644 --- a/compiler/optimizing/register_allocator_graph_color.h +++ b/compiler/optimizing/register_allocator_graph_color.h @@ -18,10 +18,10 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_GRAPH_COLOR_H_ #include "arch/instruction_set.h" -#include "base/arena_containers.h" #include "base/arena_object.h" +#include "base/array_ref.h" #include "base/macros.h" -#include "primitive.h" +#include "base/scoped_arena_containers.h" #include "register_allocator.h" namespace art { @@ -86,11 +86,11 @@ enum class CoalesceKind; */ class RegisterAllocatorGraphColor : public RegisterAllocator { public: - RegisterAllocatorGraphColor(ArenaAllocator* allocator, + RegisterAllocatorGraphColor(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis, bool iterative_move_coalescing = true); - ~RegisterAllocatorGraphColor() OVERRIDE {} + ~RegisterAllocatorGraphColor() OVERRIDE; void AllocateRegisters() OVERRIDE; @@ -142,11 +142,10 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // Assigns stack slots to a list of intervals, ensuring that interfering intervals are not // assigned the same stack slot. - void ColorSpillSlots(ArenaVector<LiveInterval*>* nodes, - size_t* num_stack_slots_used); + void ColorSpillSlots(ArrayRef<LiveInterval* const> nodes, /* out */ size_t* num_stack_slots_used); // Provide stack slots to nodes that need them. - void AllocateSpillSlots(const ArenaVector<InterferenceNode*>& nodes); + void AllocateSpillSlots(ArrayRef<InterferenceNode* const> nodes); // Whether iterative move coalescing should be performed. Iterative move coalescing // improves code quality, but increases compile time. @@ -155,19 +154,19 @@ class RegisterAllocatorGraphColor : public RegisterAllocator { // Live intervals, split by kind (core and floating point). // These should not contain high intervals, as those are represented by // the corresponding low interval throughout register allocation. - ArenaVector<LiveInterval*> core_intervals_; - ArenaVector<LiveInterval*> fp_intervals_; + ScopedArenaVector<LiveInterval*> core_intervals_; + ScopedArenaVector<LiveInterval*> fp_intervals_; // Intervals for temporaries, saved for special handling in the resolution phase. - ArenaVector<LiveInterval*> temp_intervals_; + ScopedArenaVector<LiveInterval*> temp_intervals_; // Safepoints, saved for special handling while processing instructions. - ArenaVector<HInstruction*> safepoints_; + ScopedArenaVector<HInstruction*> safepoints_; // Interference nodes representing specific registers. These are "pre-colored" nodes // in the interference graph. - ArenaVector<InterferenceNode*> physical_core_nodes_; - ArenaVector<InterferenceNode*> physical_fp_nodes_; + ScopedArenaVector<InterferenceNode*> physical_core_nodes_; + ScopedArenaVector<InterferenceNode*> physical_fp_nodes_; // Allocated stack slot counters. size_t num_int_spill_slots_; diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc index ab8d540359..216fb57a96 100644 --- a/compiler/optimizing/register_allocator_linear_scan.cc +++ b/compiler/optimizing/register_allocator_linear_scan.cc @@ -40,7 +40,7 @@ static bool IsLowOfUnalignedPairInterval(LiveInterval* low) { return GetHighForLowRegister(low->GetRegister()) != low->GetHighInterval()->GetRegister(); } -RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocator, +RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& liveness) : RegisterAllocator(allocator, codegen, liveness), @@ -81,16 +81,18 @@ RegisterAllocatorLinearScan::RegisterAllocatorLinearScan(ArenaAllocator* allocat reserved_out_slots_ = ptr_size / kVRegSize + codegen->GetGraph()->GetMaximumNumberOfOutVRegs(); } +RegisterAllocatorLinearScan::~RegisterAllocatorLinearScan() {} + static bool ShouldProcess(bool processing_core_registers, LiveInterval* interval) { if (interval == nullptr) return false; - bool is_core_register = (interval->GetType() != Primitive::kPrimDouble) - && (interval->GetType() != Primitive::kPrimFloat); + bool is_core_register = (interval->GetType() != DataType::Type::kFloat64) + && (interval->GetType() != DataType::Type::kFloat32); return processing_core_registers == is_core_register; } void RegisterAllocatorLinearScan::AllocateRegisters() { AllocateRegistersInternal(); - RegisterAllocationResolver(allocator_, codegen_, liveness_) + RegisterAllocationResolver(codegen_, liveness_) .Resolve(ArrayRef<HInstruction* const>(safepoints_), reserved_out_slots_, int_spill_slots_.size(), @@ -98,7 +100,7 @@ void RegisterAllocatorLinearScan::AllocateRegisters() { float_spill_slots_.size(), double_spill_slots_.size(), catch_phi_spill_slots_, - temp_intervals_); + ArrayRef<LiveInterval* const>(temp_intervals_)); if (kIsDebugBuild) { processing_core_registers_ = true; @@ -132,9 +134,9 @@ void RegisterAllocatorLinearScan::BlockRegister(Location location, size_t start, LiveInterval* interval = location.IsRegister() ? physical_core_register_intervals_[reg] : physical_fp_register_intervals_[reg]; - Primitive::Type type = location.IsRegister() - ? Primitive::kPrimInt - : Primitive::kPrimFloat; + DataType::Type type = location.IsRegister() + ? DataType::Type::kInt32 + : DataType::Type::kFloat32; if (interval == nullptr) { interval = LiveInterval::MakeFixedInterval(allocator_, reg, type); if (location.IsRegister()) { @@ -237,7 +239,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) switch (temp.GetPolicy()) { case Location::kRequiresRegister: { LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimInt); + LiveInterval::MakeTempInterval(allocator_, DataType::Type::kInt32); temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); unhandled_core_intervals_.push_back(interval); @@ -246,10 +248,10 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) case Location::kRequiresFpuRegister: { LiveInterval* interval = - LiveInterval::MakeTempInterval(allocator_, Primitive::kPrimDouble); + LiveInterval::MakeTempInterval(allocator_, DataType::Type::kFloat64); temp_intervals_.push_back(interval); interval->AddTempUse(instruction, i); - if (codegen_->NeedsTwoRegisters(Primitive::kPrimDouble)) { + if (codegen_->NeedsTwoRegisters(DataType::Type::kFloat64)) { interval->AddHighInterval(/* is_temp */ true); LiveInterval* high = interval->GetHighInterval(); temp_intervals_.push_back(high); @@ -266,8 +268,8 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) } } - bool core_register = (instruction->GetType() != Primitive::kPrimDouble) - && (instruction->GetType() != Primitive::kPrimFloat); + bool core_register = (instruction->GetType() != DataType::Type::kFloat64) + && (instruction->GetType() != DataType::Type::kFloat32); if (locations->NeedsSafepoint()) { if (codegen_->IsLeafMethod()) { @@ -298,7 +300,7 @@ void RegisterAllocatorLinearScan::ProcessInstruction(HInstruction* instruction) LiveInterval* current = instruction->GetLiveInterval(); if (current == nullptr) return; - ArenaVector<LiveInterval*>& unhandled = core_register + ScopedArenaVector<LiveInterval*>& unhandled = core_register ? unhandled_core_intervals_ : unhandled_fp_intervals_; @@ -425,7 +427,9 @@ class AllRangesIterator : public ValueObject { bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) const { // To simplify unit testing, we eagerly create the array of intervals, and // call the helper method. - ArenaVector<LiveInterval*> intervals(allocator_->Adapter(kArenaAllocRegisterAllocatorValidate)); + ScopedArenaAllocator allocator(allocator_->GetArenaStack()); + ScopedArenaVector<LiveInterval*> intervals( + allocator.Adapter(kArenaAllocRegisterAllocatorValidate)); for (size_t i = 0; i < liveness_.GetNumberOfSsaValues(); ++i) { HInstruction* instruction = liveness_.GetInstructionFromSsaIndex(i); if (ShouldProcess(processing_core_registers_, instruction->GetLiveInterval())) { @@ -433,7 +437,7 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co } } - const ArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ + const ScopedArenaVector<LiveInterval*>* physical_register_intervals = processing_core_registers_ ? &physical_core_register_intervals_ : &physical_fp_register_intervals_; for (LiveInterval* fixed : *physical_register_intervals) { @@ -448,8 +452,12 @@ bool RegisterAllocatorLinearScan::ValidateInternal(bool log_fatal_on_failure) co } } - return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_, - allocator_, processing_core_registers_, log_fatal_on_failure); + return ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), + GetNumberOfSpillSlots(), + reserved_out_slots_, + *codegen_, + processing_core_registers_, + log_fatal_on_failure); } void RegisterAllocatorLinearScan::DumpInterval(std::ostream& stream, LiveInterval* interval) const { @@ -813,7 +821,7 @@ int RegisterAllocatorLinearScan::FindAvailableRegister(size_t* next_use, LiveInt // Remove interval and its other half if any. Return iterator to the following element. static ArenaVector<LiveInterval*>::iterator RemoveIntervalAndPotentialOtherHalf( - ArenaVector<LiveInterval*>* intervals, ArenaVector<LiveInterval*>::iterator pos) { + ScopedArenaVector<LiveInterval*>* intervals, ScopedArenaVector<LiveInterval*>::iterator pos) { DCHECK(intervals->begin() <= pos && pos < intervals->end()); LiveInterval* interval = *pos; if (interval->IsLowInterval()) { @@ -1044,7 +1052,8 @@ bool RegisterAllocatorLinearScan::AllocateBlockedReg(LiveInterval* current) { } } -void RegisterAllocatorLinearScan::AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval) { +void RegisterAllocatorLinearScan::AddSorted(ScopedArenaVector<LiveInterval*>* array, + LiveInterval* interval) { DCHECK(!interval->IsFixed() && !interval->HasSpillSlot()); size_t insert_at = 0; for (size_t i = array->size(); i > 0; --i) { @@ -1102,26 +1111,29 @@ void RegisterAllocatorLinearScan::AllocateSpillSlotFor(LiveInterval* interval) { return; } - ArenaVector<size_t>* spill_slots = nullptr; + ScopedArenaVector<size_t>* spill_slots = nullptr; switch (interval->GetType()) { - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: spill_slots = &double_spill_slots_; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: spill_slots = &long_spill_slots_; break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: spill_slots = &float_spill_slots_; break; - case Primitive::kPrimNot: - case Primitive::kPrimInt: - case Primitive::kPrimChar: - case Primitive::kPrimByte: - case Primitive::kPrimBoolean: - case Primitive::kPrimShort: + case DataType::Type::kReference: + case DataType::Type::kInt32: + case DataType::Type::kUint16: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kBool: + case DataType::Type::kInt16: spill_slots = &int_spill_slots_; break; - case Primitive::kPrimVoid: + case DataType::Type::kUint32: + case DataType::Type::kUint64: + case DataType::Type::kVoid: LOG(FATAL) << "Unexpected type for interval " << interval->GetType(); } diff --git a/compiler/optimizing/register_allocator_linear_scan.h b/compiler/optimizing/register_allocator_linear_scan.h index b3834f45e4..36788b7c3c 100644 --- a/compiler/optimizing/register_allocator_linear_scan.h +++ b/compiler/optimizing/register_allocator_linear_scan.h @@ -18,9 +18,8 @@ #define ART_COMPILER_OPTIMIZING_REGISTER_ALLOCATOR_LINEAR_SCAN_H_ #include "arch/instruction_set.h" -#include "base/arena_containers.h" +#include "base/scoped_arena_containers.h" #include "base/macros.h" -#include "primitive.h" #include "register_allocator.h" namespace art { @@ -40,10 +39,10 @@ class SsaLivenessAnalysis; */ class RegisterAllocatorLinearScan : public RegisterAllocator { public: - RegisterAllocatorLinearScan(ArenaAllocator* allocator, + RegisterAllocatorLinearScan(ScopedArenaAllocator* allocator, CodeGenerator* codegen, const SsaLivenessAnalysis& analysis); - ~RegisterAllocatorLinearScan() OVERRIDE {} + ~RegisterAllocatorLinearScan() OVERRIDE; void AllocateRegisters() OVERRIDE; @@ -71,7 +70,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { bool AllocateBlockedReg(LiveInterval* interval); // Add `interval` in the given sorted list. - static void AddSorted(ArenaVector<LiveInterval*>* array, LiveInterval* interval); + static void AddSorted(ScopedArenaVector<LiveInterval*>* array, LiveInterval* interval); // Returns whether `reg` is blocked by the code generator. bool IsBlocked(int reg) const; @@ -108,43 +107,43 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { // List of intervals for core registers that must be processed, ordered by start // position. Last entry is the interval that has the lowest start position. // This list is initially populated before doing the linear scan. - ArenaVector<LiveInterval*> unhandled_core_intervals_; + ScopedArenaVector<LiveInterval*> unhandled_core_intervals_; // List of intervals for floating-point registers. Same comments as above. - ArenaVector<LiveInterval*> unhandled_fp_intervals_; + ScopedArenaVector<LiveInterval*> unhandled_fp_intervals_; // Currently processed list of unhandled intervals. Either `unhandled_core_intervals_` // or `unhandled_fp_intervals_`. - ArenaVector<LiveInterval*>* unhandled_; + ScopedArenaVector<LiveInterval*>* unhandled_; // List of intervals that have been processed. - ArenaVector<LiveInterval*> handled_; + ScopedArenaVector<LiveInterval*> handled_; // List of intervals that are currently active when processing a new live interval. // That is, they have a live range that spans the start of the new interval. - ArenaVector<LiveInterval*> active_; + ScopedArenaVector<LiveInterval*> active_; // List of intervals that are currently inactive when processing a new live interval. // That is, they have a lifetime hole that spans the start of the new interval. - ArenaVector<LiveInterval*> inactive_; + ScopedArenaVector<LiveInterval*> inactive_; // Fixed intervals for physical registers. Such intervals cover the positions // where an instruction requires a specific register. - ArenaVector<LiveInterval*> physical_core_register_intervals_; - ArenaVector<LiveInterval*> physical_fp_register_intervals_; + ScopedArenaVector<LiveInterval*> physical_core_register_intervals_; + ScopedArenaVector<LiveInterval*> physical_fp_register_intervals_; // Intervals for temporaries. Such intervals cover the positions // where an instruction requires a temporary. - ArenaVector<LiveInterval*> temp_intervals_; + ScopedArenaVector<LiveInterval*> temp_intervals_; // The spill slots allocated for live intervals. We ensure spill slots // are typed to avoid (1) doing moves and swaps between two different kinds // of registers, and (2) swapping between a single stack slot and a double // stack slot. This simplifies the parallel move resolver. - ArenaVector<size_t> int_spill_slots_; - ArenaVector<size_t> long_spill_slots_; - ArenaVector<size_t> float_spill_slots_; - ArenaVector<size_t> double_spill_slots_; + ScopedArenaVector<size_t> int_spill_slots_; + ScopedArenaVector<size_t> long_spill_slots_; + ScopedArenaVector<size_t> float_spill_slots_; + ScopedArenaVector<size_t> double_spill_slots_; // Spill slots allocated to catch phis. This category is special-cased because // (1) slots are allocated prior to linear scan and in reverse linear order, @@ -152,7 +151,7 @@ class RegisterAllocatorLinearScan : public RegisterAllocator { size_t catch_phi_spill_slots_; // Instructions that need a safepoint. - ArenaVector<HInstruction*> safepoints_; + ScopedArenaVector<HInstruction*> safepoints_; // True if processing core registers. False if processing floating // point registers. diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc index 24a2ab24d8..a70b0664dc 100644 --- a/compiler/optimizing/register_allocator_test.cc +++ b/compiler/optimizing/register_allocator_test.cc @@ -14,18 +14,19 @@ * limitations under the License. */ +#include "register_allocator.h" + #include "arch/x86/instruction_set_features_x86.h" #include "base/arena_allocator.h" #include "builder.h" #include "code_generator.h" #include "code_generator_x86.h" -#include "dex_file.h" -#include "dex_file_types.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_file_types.h" +#include "dex/dex_instruction.h" #include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "register_allocator.h" #include "register_allocator_linear_scan.h" #include "ssa_liveness_analysis.h" #include "ssa_phi_elimination.h" @@ -37,12 +38,36 @@ using Strategy = RegisterAllocator::Strategy; // Note: the register allocator tests rely on the fact that constants have live // intervals and registers get allocated to them. -class RegisterAllocatorTest : public CommonCompilerTest { +class RegisterAllocatorTest : public OptimizingUnitTest { protected: // These functions need to access private variables of LocationSummary, so we declare it // as a member of RegisterAllocatorTest, which we make a friend class. - static void SameAsFirstInputHint(Strategy strategy); - static void ExpectedInRegisterHint(Strategy strategy); + void SameAsFirstInputHint(Strategy strategy); + void ExpectedInRegisterHint(Strategy strategy); + + // Helper functions that make use of the OptimizingUnitTest's members. + bool Check(const std::vector<uint16_t>& data, Strategy strategy); + void CFG1(Strategy strategy); + void Loop1(Strategy strategy); + void Loop2(Strategy strategy); + void Loop3(Strategy strategy); + void DeadPhi(Strategy strategy); + HGraph* BuildIfElseWithPhi(HPhi** phi, HInstruction** input1, HInstruction** input2); + void PhiHint(Strategy strategy); + HGraph* BuildFieldReturn(HInstruction** field, HInstruction** ret); + HGraph* BuildTwoSubs(HInstruction** first_sub, HInstruction** second_sub); + HGraph* BuildDiv(HInstruction** div); + void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy); + + bool ValidateIntervals(const ScopedArenaVector<LiveInterval*>& intervals, + const CodeGenerator& codegen) { + return RegisterAllocator::ValidateIntervals(ArrayRef<LiveInterval* const>(intervals), + /* number_of_spill_slots */ 0u, + /* number_of_out_slots */ 0u, + codegen, + /* processing_core_registers */ true, + /* log_fatal_on_failure */ false); + } }; // This macro should include all register allocation strategies that should be tested. @@ -54,17 +79,15 @@ TEST_F(RegisterAllocatorTest, test_name##_GraphColor) {\ test_name(Strategy::kRegisterAllocatorGraphColor);\ } -static bool Check(const uint16_t* data, Strategy strategy) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +bool RegisterAllocatorTest::Check(const std::vector<uint16_t>& data, Strategy strategy) { + HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); return register_allocator->Validate(false); } @@ -74,95 +97,82 @@ static bool Check(const uint16_t* data, Strategy strategy) { * tests are based on this validation method. */ TEST_F(RegisterAllocatorTest, ValidateIntervals) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); + HGraph* graph = CreateGraph(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - ArenaVector<LiveInterval*> intervals(allocator.Adapter()); + ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter()); // Test with two intervals of the same range. { static constexpr size_t ranges[][2] = {{0, 42}}; - intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 0)); - intervals.push_back(BuildInterval(ranges, arraysize(ranges), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), GetScopedAllocator(), 0)); + intervals.push_back(BuildInterval(ranges, arraysize(ranges), GetScopedAllocator(), 1)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[1]->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_FALSE(ValidateIntervals(intervals, codegen)); intervals.clear(); } // Test with two non-intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}}; - intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), GetScopedAllocator(), 1)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[1]->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals.clear(); } // Test with two non-intersecting intervals, with one with a lifetime hole. { static constexpr size_t ranges1[][2] = {{0, 42}, {45, 48}}; - intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), 0)); static constexpr size_t ranges2[][2] = {{42, 43}}; - intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), GetScopedAllocator(), 1)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[1]->SetRegister(0); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals.clear(); } // Test with intersecting intervals. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), 0)); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), GetScopedAllocator(), 1)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[1]->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_FALSE(ValidateIntervals(intervals, codegen)); intervals.clear(); } // Test with siblings. { static constexpr size_t ranges1[][2] = {{0, 42}, {44, 48}}; - intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), &allocator, 0)); + intervals.push_back(BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), 0)); intervals[0]->SplitAt(43); static constexpr size_t ranges2[][2] = {{42, 47}}; - intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), &allocator, 1)); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + intervals.push_back(BuildInterval(ranges2, arraysize(ranges2), GetScopedAllocator(), 1)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[1]->SetRegister(0); // Sibling of the first interval has no register allocated to it. - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); intervals[0]->GetNextSibling()->SetRegister(0); - ASSERT_FALSE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_FALSE(ValidateIntervals(intervals, codegen)); } } -static void CFG1(Strategy strategy) { +void RegisterAllocatorTest::CFG1(Strategy strategy) { /* * Test the following snippet: * return 0; @@ -175,7 +185,7 @@ static void CFG1(Strategy strategy) { * | * exit */ - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); @@ -184,7 +194,7 @@ static void CFG1(Strategy strategy) { TEST_ALL_STRATEGIES(CFG1); -static void Loop1(Strategy strategy) { +void RegisterAllocatorTest::Loop1(Strategy strategy) { /* * Test the following snippet: * int a = 0; @@ -212,7 +222,7 @@ static void Loop1(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -225,7 +235,7 @@ static void Loop1(Strategy strategy) { TEST_ALL_STRATEGIES(Loop1); -static void Loop2(Strategy strategy) { +void RegisterAllocatorTest::Loop2(Strategy strategy) { /* * Test the following snippet: * int a = 0; @@ -258,7 +268,7 @@ static void Loop2(Strategy strategy) { * exit */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 8 << 12 | 1 << 8, Instruction::IF_EQ | 1 << 8, 7, @@ -276,7 +286,7 @@ static void Loop2(Strategy strategy) { TEST_ALL_STRATEGIES(Loop2); -static void Loop3(Strategy strategy) { +void RegisterAllocatorTest::Loop3(Strategy strategy) { /* * Test the following snippet: * int a = 0 @@ -304,7 +314,7 @@ static void Loop3(Strategy strategy) { * exit */ - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::ADD_INT_LIT8 | 1 << 8, 1 << 8, Instruction::CONST_4 | 5 << 12 | 2 << 8, @@ -313,16 +323,14 @@ static void Loop3(Strategy strategy) { Instruction::MOVE | 1 << 12 | 0 << 8, Instruction::GOTO | 0xF900); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_TRUE(register_allocator->Validate(false)); @@ -343,20 +351,18 @@ static void Loop3(Strategy strategy) { TEST_ALL_STRATEGIES(Loop3); TEST_F(RegisterAllocatorTest, FirstRegisterUse) { - const uint16_t data[] = THREE_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = THREE_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8, Instruction::XOR_INT_LIT8 | 0 << 8, 1 << 8, Instruction::XOR_INT_LIT8 | 1 << 8, 1 << 8 | 1, Instruction::RETURN_VOID); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); HXor* first_xor = graph->GetBlocks()[1]->GetFirstInstruction()->AsXor(); @@ -382,7 +388,7 @@ TEST_F(RegisterAllocatorTest, FirstRegisterUse) { ASSERT_EQ(new_interval->FirstRegisterUse(), last_xor->GetLifetimePosition()); } -static void DeadPhi(Strategy strategy) { +void RegisterAllocatorTest::DeadPhi(Strategy strategy) { /* Test for a dead loop phi taking as back-edge input a phi that also has * this loop phi as input. Walking backwards in SsaDeadPhiElimination * does not solve the problem because the loop phi will be visited last. @@ -396,7 +402,7 @@ static void DeadPhi(Strategy strategy) { * } while (true); */ - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::CONST_4 | 1 << 8 | 0, Instruction::IF_NE | 1 << 8 | 1 << 12, 3, @@ -404,17 +410,15 @@ static void DeadPhi(Strategy strategy) { Instruction::GOTO | 0xFD00, Instruction::RETURN_VOID); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_TRUE(register_allocator->Validate(false)); } @@ -428,20 +432,18 @@ TEST_ALL_STRATEGIES(DeadPhi); * This test only applies to the linear scan allocator. */ TEST_F(RegisterAllocatorTest, FreeUntil) { - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN); - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); + HGraph* graph = CreateCFG(data); SsaDeadPhiElimination(graph).Run(); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness); + RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness); // Add an artifical range to cover the temps that will be put in the unhandled list. LiveInterval* unhandled = graph->GetEntryBlock()->GetFirstInstruction()->GetLiveInterval(); @@ -460,20 +462,21 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { // Add three temps holding the same register, and starting at different positions. // Put the one that should be picked in the middle of the inactive list to ensure // we do not depend on an order. - LiveInterval* interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); + LiveInterval* interval = + LiveInterval::MakeFixedInterval(GetScopedAllocator(), 0, DataType::Type::kInt32); interval->AddRange(40, 50); register_allocator.inactive_.push_back(interval); - interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); + interval = LiveInterval::MakeFixedInterval(GetScopedAllocator(), 0, DataType::Type::kInt32); interval->AddRange(20, 30); register_allocator.inactive_.push_back(interval); - interval = LiveInterval::MakeFixedInterval(&allocator, 0, Primitive::kPrimInt); + interval = LiveInterval::MakeFixedInterval(GetScopedAllocator(), 0, DataType::Type::kInt32); interval->AddRange(60, 70); register_allocator.inactive_.push_back(interval); register_allocator.number_of_registers_ = 1; - register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); + register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1); register_allocator.processing_core_registers_ = true; register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; @@ -486,36 +489,35 @@ TEST_F(RegisterAllocatorTest, FreeUntil) { ASSERT_EQ(20u, register_allocator.unhandled_->front()->GetStart()); } -static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, - HPhi** phi, - HInstruction** input1, - HInstruction** input2) { - HGraph* graph = CreateGraph(allocator); - HBasicBlock* entry = new (allocator) HBasicBlock(graph); +HGraph* RegisterAllocatorTest::BuildIfElseWithPhi(HPhi** phi, + HInstruction** input1, + HInstruction** input2) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); - HBasicBlock* block = new (allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - HInstruction* test = new (allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimBoolean, - MemberOffset(22), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0); + HInstruction* test = new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kBool, + MemberOffset(22), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0); block->AddInstruction(test); - block->AddInstruction(new (allocator) HIf(test)); - HBasicBlock* then = new (allocator) HBasicBlock(graph); - HBasicBlock* else_ = new (allocator) HBasicBlock(graph); - HBasicBlock* join = new (allocator) HBasicBlock(graph); + block->AddInstruction(new (GetAllocator()) HIf(test)); + HBasicBlock* then = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* else_ = new (GetAllocator()) HBasicBlock(graph); + HBasicBlock* join = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(then); graph->AddBlock(else_); graph->AddBlock(join); @@ -524,32 +526,32 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, block->AddSuccessor(else_); then->AddSuccessor(join); else_->AddSuccessor(join); - then->AddInstruction(new (allocator) HGoto()); - else_->AddInstruction(new (allocator) HGoto()); + then->AddInstruction(new (GetAllocator()) HGoto()); + else_->AddInstruction(new (GetAllocator()) HGoto()); - *phi = new (allocator) HPhi(allocator, 0, 0, Primitive::kPrimInt); + *phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); join->AddPhi(*phi); - *input1 = new (allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimInt, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0); - *input2 = new (allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimInt, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0); + *input1 = new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kInt32, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0); + *input2 = new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kInt32, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0); then->AddInstruction(*input1); else_->AddInstruction(*input2); - join->AddInstruction(new (allocator) HExit()); + join->AddInstruction(new (GetAllocator()) HExit()); (*phi)->AddInput(*input1); (*phi)->AddInput(*input2); @@ -558,23 +560,21 @@ static HGraph* BuildIfElseWithPhi(ArenaAllocator* allocator, return graph; } -static void PhiHint(Strategy strategy) { - ArenaPool pool; - ArenaAllocator allocator(&pool); +void RegisterAllocatorTest::PhiHint(Strategy strategy) { HPhi *phi; HInstruction *input1, *input2; { - HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); + HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Check that the register allocator is deterministic. - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 0); @@ -583,18 +583,18 @@ static void PhiHint(Strategy strategy) { } { - HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); + HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Set the phi to a specific register, and check that the inputs get allocated // the same register. phi->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -603,18 +603,18 @@ static void PhiHint(Strategy strategy) { } { - HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); + HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Set input1 to a specific register, and check that the phi and other input get allocated // the same register. input1->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -623,18 +623,18 @@ static void PhiHint(Strategy strategy) { } { - HGraph* graph = BuildIfElseWithPhi(&allocator, &phi, &input1, &input2); + HGraph* graph = BuildIfElseWithPhi(&phi, &input1, &input2); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Set input2 to a specific register, and check that the phi and other input get allocated // the same register. input2->GetLocations()->UpdateOut(Location::RegisterLocation(2)); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(input1->GetLiveInterval()->GetRegister(), 2); @@ -649,58 +649,54 @@ TEST_F(RegisterAllocatorTest, PhiHint_LinearScan) { PhiHint(Strategy::kRegisterAllocatorLinearScan); } -static HGraph* BuildFieldReturn(ArenaAllocator* allocator, - HInstruction** field, - HInstruction** ret) { - HGraph* graph = CreateGraph(allocator); - HBasicBlock* entry = new (allocator) HBasicBlock(graph); +HGraph* RegisterAllocatorTest::BuildFieldReturn(HInstruction** field, HInstruction** ret) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); entry->AddInstruction(parameter); - HBasicBlock* block = new (allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - *field = new (allocator) HInstanceFieldGet(parameter, - nullptr, - Primitive::kPrimInt, - MemberOffset(42), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph->GetDexFile(), - 0); + *field = new (GetAllocator()) HInstanceFieldGet(parameter, + nullptr, + DataType::Type::kInt32, + MemberOffset(42), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph->GetDexFile(), + 0); block->AddInstruction(*field); - *ret = new (allocator) HReturn(*field); + *ret = new (GetAllocator()) HReturn(*field); block->AddInstruction(*ret); - HBasicBlock* exit = new (allocator) HBasicBlock(graph); + HBasicBlock* exit = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(exit); block->AddSuccessor(exit); - exit->AddInstruction(new (allocator) HExit()); + exit->AddInstruction(new (GetAllocator()) HExit()); graph->BuildDominatorTree(); return graph; } void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { - ArenaPool pool; - ArenaAllocator allocator(&pool); HInstruction *field, *ret; { - HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); + HGraph* graph = BuildFieldReturn(&field, &ret); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the register should be hinted to 0 (EAX). @@ -708,19 +704,19 @@ void RegisterAllocatorTest::ExpectedInRegisterHint(Strategy strategy) { } { - HGraph* graph = BuildFieldReturn(&allocator, &field, &ret); + HGraph* graph = BuildFieldReturn(&field, &ret); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // Check that the field gets put in the register expected by its use. // Don't use SetInAt because we are overriding an already allocated location. ret->GetLocations()->inputs_[0] = Location::RegisterLocation(2); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(field->GetLiveInterval()->GetRegister(), 2); @@ -733,50 +729,46 @@ TEST_F(RegisterAllocatorTest, ExpectedInRegisterHint_LinearScan) { ExpectedInRegisterHint(Strategy::kRegisterAllocatorLinearScan); } -static HGraph* BuildTwoSubs(ArenaAllocator* allocator, - HInstruction** first_sub, - HInstruction** second_sub) { - HGraph* graph = CreateGraph(allocator); - HBasicBlock* entry = new (allocator) HBasicBlock(graph); +HGraph* RegisterAllocatorTest::BuildTwoSubs(HInstruction** first_sub, HInstruction** second_sub) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* parameter = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + HInstruction* parameter = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry->AddInstruction(parameter); HInstruction* constant1 = graph->GetIntConstant(1); HInstruction* constant2 = graph->GetIntConstant(2); - HBasicBlock* block = new (allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - *first_sub = new (allocator) HSub(Primitive::kPrimInt, parameter, constant1); + *first_sub = new (GetAllocator()) HSub(DataType::Type::kInt32, parameter, constant1); block->AddInstruction(*first_sub); - *second_sub = new (allocator) HSub(Primitive::kPrimInt, *first_sub, constant2); + *second_sub = new (GetAllocator()) HSub(DataType::Type::kInt32, *first_sub, constant2); block->AddInstruction(*second_sub); - block->AddInstruction(new (allocator) HExit()); + block->AddInstruction(new (GetAllocator()) HExit()); graph->BuildDominatorTree(); return graph; } void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { - ArenaPool pool; - ArenaAllocator allocator(&pool); HInstruction *first_sub, *second_sub; { - HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); + HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); // Sanity check that in normal conditions, the registers are the same. @@ -785,11 +777,11 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { } { - HGraph* graph = BuildTwoSubs(&allocator, &first_sub, &second_sub); + HGraph* graph = BuildTwoSubs(&first_sub, &second_sub); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); liveness.Analyze(); // check that both adds get the same register. @@ -798,8 +790,8 @@ void RegisterAllocatorTest::SameAsFirstInputHint(Strategy strategy) { ASSERT_EQ(first_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); ASSERT_EQ(second_sub->GetLocations()->Out().GetPolicy(), Location::kSameAsFirstInput); - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); register_allocator->AllocateRegisters(); ASSERT_EQ(first_sub->GetLiveInterval()->GetRegister(), 2); @@ -813,52 +805,47 @@ TEST_F(RegisterAllocatorTest, SameAsFirstInputHint_LinearScan) { SameAsFirstInputHint(Strategy::kRegisterAllocatorLinearScan); } -static HGraph* BuildDiv(ArenaAllocator* allocator, - HInstruction** div) { - HGraph* graph = CreateGraph(allocator); - HBasicBlock* entry = new (allocator) HBasicBlock(graph); +HGraph* RegisterAllocatorTest::BuildDiv(HInstruction** div) { + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* first = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); - HInstruction* second = new (allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + HInstruction* first = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); + HInstruction* second = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry->AddInstruction(first); entry->AddInstruction(second); - HBasicBlock* block = new (allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - *div = new (allocator) HDiv(Primitive::kPrimInt, first, second, 0); // don't care about dex_pc. + *div = new (GetAllocator()) HDiv( + DataType::Type::kInt32, first, second, 0); // don't care about dex_pc. block->AddInstruction(*div); - block->AddInstruction(new (allocator) HExit()); + block->AddInstruction(new (GetAllocator()) HExit()); graph->BuildDominatorTree(); return graph; } -static void ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { - ArenaPool pool; - ArenaAllocator allocator(&pool); +void RegisterAllocatorTest::ExpectedExactInRegisterAndSameOutputHint(Strategy strategy) { HInstruction *div; + HGraph* graph = BuildDiv(&div); + std::unique_ptr<const X86InstructionSetFeatures> features_x86( + X86InstructionSetFeatures::FromCppDefines()); + x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); + liveness.Analyze(); - { - HGraph* graph = BuildDiv(&allocator, &div); - std::unique_ptr<const X86InstructionSetFeatures> features_x86( - X86InstructionSetFeatures::FromCppDefines()); - x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); - liveness.Analyze(); - - RegisterAllocator* register_allocator = - RegisterAllocator::Create(&allocator, &codegen, liveness, strategy); - register_allocator->AllocateRegisters(); + std::unique_ptr<RegisterAllocator> register_allocator = + RegisterAllocator::Create(GetScopedAllocator(), &codegen, liveness, strategy); + register_allocator->AllocateRegisters(); - // div on x86 requires its first input in eax and the output be the same as the first input. - ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0); - } + // div on x86 requires its first input in eax and the output be the same as the first input. + ASSERT_EQ(div->GetLiveInterval()->GetRegister(), 0); } // TODO: Enable this test for graph coloring register allocation when iterative move @@ -872,59 +859,57 @@ TEST_F(RegisterAllocatorTest, ExpectedExactInRegisterAndSameOutputHint_LinearSca // position. // This test only applies to the linear scan allocator. TEST_F(RegisterAllocatorTest, SpillInactive) { - ArenaPool pool; - // Create a synthesized graph to please the register_allocator and // ssa_liveness_analysis code. - ArenaAllocator allocator(&pool); - HGraph* graph = CreateGraph(&allocator); - HBasicBlock* entry = new (&allocator) HBasicBlock(graph); + HGraph* graph = CreateGraph(); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(entry); graph->SetEntryBlock(entry); - HInstruction* one = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); - HInstruction* two = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); - HInstruction* three = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); - HInstruction* four = new (&allocator) HParameterValue( - graph->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + HInstruction* one = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); + HInstruction* two = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); + HInstruction* three = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); + HInstruction* four = new (GetAllocator()) HParameterValue( + graph->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry->AddInstruction(one); entry->AddInstruction(two); entry->AddInstruction(three); entry->AddInstruction(four); - HBasicBlock* block = new (&allocator) HBasicBlock(graph); + HBasicBlock* block = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(block); entry->AddSuccessor(block); - block->AddInstruction(new (&allocator) HExit()); + block->AddInstruction(new (GetAllocator()) HExit()); // We create a synthesized user requesting a register, to avoid just spilling the // intervals. - HPhi* user = new (&allocator) HPhi(&allocator, 0, 1, Primitive::kPrimInt); + HPhi* user = new (GetAllocator()) HPhi(GetAllocator(), 0, 1, DataType::Type::kInt32); user->AddInput(one); user->SetBlock(block); - LocationSummary* locations = new (&allocator) LocationSummary(user, LocationSummary::kNoCall); + LocationSummary* locations = new (GetAllocator()) LocationSummary(user, LocationSummary::kNoCall); locations->SetInAt(0, Location::RequiresRegister()); static constexpr size_t phi_ranges[][2] = {{20, 30}}; - BuildInterval(phi_ranges, arraysize(phi_ranges), &allocator, -1, user); + BuildInterval(phi_ranges, arraysize(phi_ranges), GetScopedAllocator(), -1, user); // Create an interval with lifetime holes. static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}}; - LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one); - first->uses_.push_front(*new(&allocator) UsePosition(user, false, 8)); - first->uses_.push_front(*new(&allocator) UsePosition(user, false, 7)); - first->uses_.push_front(*new(&allocator) UsePosition(user, false, 6)); + LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), GetScopedAllocator(), -1, one); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 7)); + first->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 6)); - locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); + locations = new (GetAllocator()) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); first = first->SplitAt(1); // Create an interval that conflicts with the next interval, to force the next // interval to call `AllocateBlockedReg`. static constexpr size_t ranges2[][2] = {{2, 4}}; - LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), &allocator, -1, two); - locations = new (&allocator) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall); + LiveInterval* second = BuildInterval(ranges2, arraysize(ranges2), GetScopedAllocator(), -1, two); + locations = + new (GetAllocator()) LocationSummary(second->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); // Create an interval that will lead to splitting the first interval. The bug occured @@ -933,31 +918,32 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // "[0, 2(, [4, 6(" in the list of handled intervals, even though we haven't processed intervals // before lifetime position 6 yet. static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}}; - LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three); - third->uses_.push_front(*new(&allocator) UsePosition(user, false, 8)); - third->uses_.push_front(*new(&allocator) UsePosition(user, false, 4)); - third->uses_.push_front(*new(&allocator) UsePosition(user, false, 3)); - locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); + LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), GetScopedAllocator(), -1, three); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 8)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 4)); + third->uses_.push_front(*new (GetScopedAllocator()) UsePosition(user, false, 3)); + locations = new (GetAllocator()) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); third = third->SplitAt(3); // Because the first part of the split interval was considered handled, this interval // was free to allocate the same register, even though it conflicts with it. static constexpr size_t ranges4[][2] = {{4, 6}}; - LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), &allocator, -1, four); - locations = new (&allocator) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); + LiveInterval* fourth = BuildInterval(ranges4, arraysize(ranges4), GetScopedAllocator(), -1, four); + locations = + new (GetAllocator()) LocationSummary(fourth->GetDefinedBy(), LocationSummary::kNoCall); locations->SetOut(Location::RequiresRegister()); std::unique_ptr<const X86InstructionSetFeatures> features_x86( X86InstructionSetFeatures::FromCppDefines()); x86::CodeGeneratorX86 codegen(graph, *features_x86.get(), CompilerOptions()); - SsaLivenessAnalysis liveness(graph, &codegen); + SsaLivenessAnalysis liveness(graph, &codegen, GetScopedAllocator()); // Populate the instructions in the liveness object, to please the register allocator. for (size_t i = 0; i < 32; ++i) { liveness.instructions_from_lifetime_position_.push_back(user); } - RegisterAllocatorLinearScan register_allocator(&allocator, &codegen, liveness); + RegisterAllocatorLinearScan register_allocator(GetScopedAllocator(), &codegen, liveness); register_allocator.unhandled_core_intervals_.push_back(fourth); register_allocator.unhandled_core_intervals_.push_back(third); register_allocator.unhandled_core_intervals_.push_back(second); @@ -965,19 +951,18 @@ TEST_F(RegisterAllocatorTest, SpillInactive) { // Set just one register available to make all intervals compete for the same. register_allocator.number_of_registers_ = 1; - register_allocator.registers_array_ = allocator.AllocArray<size_t>(1); + register_allocator.registers_array_ = GetAllocator()->AllocArray<size_t>(1); register_allocator.processing_core_registers_ = true; register_allocator.unhandled_ = ®ister_allocator.unhandled_core_intervals_; register_allocator.LinearScan(); // Test that there is no conflicts between intervals. - ArenaVector<LiveInterval*> intervals(allocator.Adapter()); + ScopedArenaVector<LiveInterval*> intervals(GetScopedAllocator()->Adapter()); intervals.push_back(first); intervals.push_back(second); intervals.push_back(third); intervals.push_back(fourth); - ASSERT_TRUE(RegisterAllocator::ValidateIntervals( - intervals, 0, 0, codegen, &allocator, true, false)); + ASSERT_TRUE(ValidateIntervals(intervals, codegen)); } } // namespace art diff --git a/compiler/optimizing/scheduler.cc b/compiler/optimizing/scheduler.cc index 3e373d16fb..bb28d50b56 100644 --- a/compiler/optimizing/scheduler.cc +++ b/compiler/optimizing/scheduler.cc @@ -16,9 +16,13 @@ #include <string> -#include "prepare_for_register_allocation.h" #include "scheduler.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" +#include "data_type-inl.h" +#include "prepare_for_register_allocation.h" + #ifdef ART_ENABLE_CODEGEN_arm64 #include "scheduler_arm64.h" #endif @@ -68,7 +72,7 @@ static bool MayHaveReorderingDependency(SideEffects node, SideEffects other) { size_t SchedulingGraph::ArrayAccessHeapLocation(HInstruction* array, HInstruction* index) const { DCHECK(heap_location_collector_ != nullptr); - size_t heap_loc = heap_location_collector_->GetArrayAccessHeapLocation(array, index); + size_t heap_loc = heap_location_collector_->GetArrayHeapLocation(array, index); // This array access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); return heap_loc; @@ -149,12 +153,7 @@ size_t SchedulingGraph::FieldAccessHeapLocation(HInstruction* obj, const FieldIn DCHECK(field != nullptr); DCHECK(heap_location_collector_ != nullptr); - size_t heap_loc = heap_location_collector_->FindHeapLocationIndex( - heap_location_collector_->FindReferenceInfoOf( - heap_location_collector_->HuntForOriginalReference(obj)), - field->GetFieldOffset().SizeValue(), - nullptr, - field->GetDeclaringClassDefIndex()); + size_t heap_loc = heap_location_collector_->GetFieldHeapLocation(obj, field); // This field access should be analyzed and added to HeapLocationCollector before. DCHECK(heap_loc != HeapLocationCollector::kHeapLocationNotFound); @@ -399,17 +398,7 @@ bool SchedulingGraph::HasImmediateOtherDependency(const HInstruction* instructio } static const std::string InstructionTypeId(const HInstruction* instruction) { - std::string id; - Primitive::Type type = instruction->GetType(); - if (type == Primitive::kPrimNot) { - id.append("l"); - } else { - id.append(Primitive::Descriptor(instruction->GetType())); - } - // Use lower-case to be closer to the `HGraphVisualizer` output. - id[0] = std::tolower(id[0]); - id.append(std::to_string(instruction->GetId())); - return id; + return DataType::TypeId(instruction->GetType()) + std::to_string(instruction->GetId()); } // Ideally we would reuse the graph visualizer code, but it is not available @@ -450,7 +439,7 @@ static void DumpAsDotNode(std::ostream& output, const SchedulingNode* node) { } void SchedulingGraph::DumpAsDotGraph(const std::string& description, - const ArenaVector<SchedulingNode*>& initial_candidates) { + const ScopedArenaVector<SchedulingNode*>& initial_candidates) { // TODO(xueliang): ideally we should move scheduling information into HInstruction, after that // we should move this dotty graph dump feature to visualizer, and have a compiler option for it. std::ofstream output("scheduling_graphs.dot", std::ofstream::out | std::ofstream::app); @@ -459,7 +448,7 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description, // Start the dot graph. Use an increasing index for easier differentiation. output << "digraph G {\n"; for (const auto& entry : nodes_map_) { - SchedulingNode* node = entry.second; + SchedulingNode* node = entry.second.get(); DumpAsDotNode(output, node); } // Create a fake 'end_of_scheduling' node to help visualization of critical_paths. @@ -474,7 +463,7 @@ void SchedulingGraph::DumpAsDotGraph(const std::string& description, } SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition( - ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const { + ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) const { // Schedule condition inputs that can be materialized immediately before their use. // In following example, after we've scheduled HSelect, we want LessThan to be scheduled // immediately, because it is a materialized condition, and will be emitted right before HSelect @@ -514,7 +503,7 @@ SchedulingNode* CriticalPathSchedulingNodeSelector::SelectMaterializedCondition( } SchedulingNode* CriticalPathSchedulingNodeSelector::PopHighestPriorityNode( - ArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) { + ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) { DCHECK(!nodes->empty()); SchedulingNode* select_node = nullptr; @@ -570,7 +559,7 @@ void HScheduler::Schedule(HGraph* graph) { } void HScheduler::Schedule(HBasicBlock* block) { - ArenaVector<SchedulingNode*> scheduling_nodes(arena_->Adapter(kArenaAllocScheduler)); + ScopedArenaVector<SchedulingNode*> scheduling_nodes(allocator_->Adapter(kArenaAllocScheduler)); // Build the scheduling graph. scheduling_graph_.Clear(); @@ -601,7 +590,7 @@ void HScheduler::Schedule(HBasicBlock* block) { } } - ArenaVector<SchedulingNode*> initial_candidates(arena_->Adapter(kArenaAllocScheduler)); + ScopedArenaVector<SchedulingNode*> initial_candidates(allocator_->Adapter(kArenaAllocScheduler)); if (kDumpDotSchedulingGraphs) { // Remember the list of initial candidates for debug output purposes. initial_candidates.assign(candidates_.begin(), candidates_.end()); @@ -724,8 +713,8 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsClassTableGet() || instruction->IsCurrentMethod() || instruction->IsDivZeroCheck() || - instruction->IsInstanceFieldGet() || - instruction->IsInstanceFieldSet() || + (instruction->IsInstanceFieldGet() && !instruction->AsInstanceFieldGet()->IsVolatile()) || + (instruction->IsInstanceFieldSet() && !instruction->AsInstanceFieldSet()->IsVolatile()) || instruction->IsInstanceOf() || instruction->IsInvokeInterface() || instruction->IsInvokeStaticOrDirect() || @@ -741,14 +730,10 @@ bool HScheduler::IsSchedulable(const HInstruction* instruction) const { instruction->IsReturn() || instruction->IsReturnVoid() || instruction->IsSelect() || - instruction->IsStaticFieldGet() || - instruction->IsStaticFieldSet() || + (instruction->IsStaticFieldGet() && !instruction->AsStaticFieldGet()->IsVolatile()) || + (instruction->IsStaticFieldSet() && !instruction->AsStaticFieldSet()->IsVolatile()) || instruction->IsSuspendCheck() || - instruction->IsTypeConversion() || - instruction->IsUnresolvedInstanceFieldGet() || - instruction->IsUnresolvedInstanceFieldSet() || - instruction->IsUnresolvedStaticFieldGet() || - instruction->IsUnresolvedStaticFieldSet(); + instruction->IsTypeConversion(); } bool HScheduler::IsSchedulable(const HBasicBlock* block) const { @@ -791,7 +776,7 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, #if defined(ART_ENABLE_CODEGEN_arm64) || defined(ART_ENABLE_CODEGEN_arm) // Phase-local allocator that allocates scheduler internal data structures like // scheduling nodes, internel nodes map, dependencies, etc. - ArenaAllocator arena_allocator(graph_->GetArena()->GetArenaPool()); + ScopedArenaAllocator allocator(graph_->GetArenaStack()); CriticalPathSchedulingNodeSelector critical_path_selector; RandomSchedulingNodeSelector random_selector; SchedulingNodeSelector* selector = schedule_randomly @@ -806,18 +791,18 @@ void HInstructionScheduling::Run(bool only_optimize_loop_blocks, switch (instruction_set_) { #ifdef ART_ENABLE_CODEGEN_arm64 - case kArm64: { - arm64::HSchedulerARM64 scheduler(&arena_allocator, selector); + case InstructionSet::kArm64: { + arm64::HSchedulerARM64 scheduler(&allocator, selector); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; } #endif #if defined(ART_ENABLE_CODEGEN_arm) - case kThumb2: - case kArm: { + case InstructionSet::kThumb2: + case InstructionSet::kArm: { arm::SchedulingLatencyVisitorARM arm_latency_visitor(codegen_); - arm::HSchedulerARM scheduler(&arena_allocator, selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(&allocator, selector, &arm_latency_visitor); scheduler.SetOnlyOptimizeLoopBlocks(only_optimize_loop_blocks); scheduler.Schedule(graph_); break; diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h index 930a2c82cf..dfa077f7de 100644 --- a/compiler/optimizing/scheduler.h +++ b/compiler/optimizing/scheduler.h @@ -19,12 +19,14 @@ #include <fstream> +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/time_utils.h" +#include "code_generator.h" #include "driver/compiler_driver.h" #include "load_store_analysis.h" #include "nodes.h" #include "optimization.h" -#include "code_generator.h" namespace art { @@ -152,16 +154,16 @@ class HScheduler; /** * A node representing an `HInstruction` in the `SchedulingGraph`. */ -class SchedulingNode : public ArenaObject<kArenaAllocScheduler> { +class SchedulingNode : public DeletableArenaObject<kArenaAllocScheduler> { public: - SchedulingNode(HInstruction* instr, ArenaAllocator* arena, bool is_scheduling_barrier) + SchedulingNode(HInstruction* instr, ScopedArenaAllocator* allocator, bool is_scheduling_barrier) : latency_(0), internal_latency_(0), critical_path_(0), instruction_(instr), is_scheduling_barrier_(is_scheduling_barrier), - data_predecessors_(arena->Adapter(kArenaAllocScheduler)), - other_predecessors_(arena->Adapter(kArenaAllocScheduler)), + data_predecessors_(allocator->Adapter(kArenaAllocScheduler)), + other_predecessors_(allocator->Adapter(kArenaAllocScheduler)), num_unscheduled_successors_(0) { data_predecessors_.reserve(kPreallocatedPredecessors); } @@ -171,11 +173,19 @@ class SchedulingNode : public ArenaObject<kArenaAllocScheduler> { predecessor->num_unscheduled_successors_++; } + const ScopedArenaVector<SchedulingNode*>& GetDataPredecessors() const { + return data_predecessors_; + } + void AddOtherPredecessor(SchedulingNode* predecessor) { other_predecessors_.push_back(predecessor); predecessor->num_unscheduled_successors_++; } + const ScopedArenaVector<SchedulingNode*>& GetOtherPredecessors() const { + return other_predecessors_; + } + void DecrementNumberOfUnscheduledSuccessors() { num_unscheduled_successors_--; } @@ -195,8 +205,6 @@ class SchedulingNode : public ArenaObject<kArenaAllocScheduler> { void SetInternalLatency(uint32_t internal_latency) { internal_latency_ = internal_latency; } uint32_t GetCriticalPath() const { return critical_path_; } bool IsSchedulingBarrier() const { return is_scheduling_barrier_; } - const ArenaVector<SchedulingNode*>& GetDataPredecessors() const { return data_predecessors_; } - const ArenaVector<SchedulingNode*>& GetOtherPredecessors() const { return other_predecessors_; } private: // The latency of this node. It represents the latency between the moment the @@ -227,8 +235,8 @@ class SchedulingNode : public ArenaObject<kArenaAllocScheduler> { // Predecessors in `data_predecessors_` are data dependencies. Those in // `other_predecessors_` contain side-effect dependencies, environment // dependencies, and scheduling barrier dependencies. - ArenaVector<SchedulingNode*> data_predecessors_; - ArenaVector<SchedulingNode*> other_predecessors_; + ScopedArenaVector<SchedulingNode*> data_predecessors_; + ScopedArenaVector<SchedulingNode*> other_predecessors_; // The number of unscheduled successors for this node. This number is // decremented as successors are scheduled. When it reaches zero this node @@ -243,19 +251,21 @@ class SchedulingNode : public ArenaObject<kArenaAllocScheduler> { */ class SchedulingGraph : public ValueObject { public: - SchedulingGraph(const HScheduler* scheduler, ArenaAllocator* arena) + SchedulingGraph(const HScheduler* scheduler, ScopedArenaAllocator* allocator) : scheduler_(scheduler), - arena_(arena), + allocator_(allocator), contains_scheduling_barrier_(false), - nodes_map_(arena_->Adapter(kArenaAllocScheduler)), + nodes_map_(allocator_->Adapter(kArenaAllocScheduler)), heap_location_collector_(nullptr) {} SchedulingNode* AddNode(HInstruction* instr, bool is_scheduling_barrier = false) { - SchedulingNode* node = new (arena_) SchedulingNode(instr, arena_, is_scheduling_barrier); - nodes_map_.Insert(std::make_pair(instr, node)); + std::unique_ptr<SchedulingNode> node( + new (allocator_) SchedulingNode(instr, allocator_, is_scheduling_barrier)); + SchedulingNode* result = node.get(); + nodes_map_.Insert(std::make_pair(instr, std::move(node))); contains_scheduling_barrier_ |= is_scheduling_barrier; AddDependencies(instr, is_scheduling_barrier); - return node; + return result; } void Clear() { @@ -272,7 +282,7 @@ class SchedulingGraph : public ValueObject { if (it == nodes_map_.end()) { return nullptr; } else { - return it->second; + return it->second.get(); } } @@ -290,7 +300,7 @@ class SchedulingGraph : public ValueObject { // Dump the scheduling graph, in dot file format, appending it to the file // `scheduling_graphs.dot`. void DumpAsDotGraph(const std::string& description, - const ArenaVector<SchedulingNode*>& initial_candidates); + const ScopedArenaVector<SchedulingNode*>& initial_candidates); protected: void AddDependency(SchedulingNode* node, SchedulingNode* dependency, bool is_data_dependency); @@ -313,11 +323,11 @@ class SchedulingGraph : public ValueObject { const HScheduler* const scheduler_; - ArenaAllocator* const arena_; + ScopedArenaAllocator* const allocator_; bool contains_scheduling_barrier_; - ArenaHashMap<const HInstruction*, SchedulingNode*> nodes_map_; + ScopedArenaHashMap<const HInstruction*, std::unique_ptr<SchedulingNode>> nodes_map_; const HeapLocationCollector* heap_location_collector_; }; @@ -367,11 +377,11 @@ class SchedulingLatencyVisitor : public HGraphDelegateVisitor { class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { public: - virtual SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes, + virtual SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) = 0; virtual ~SchedulingNodeSelector() {} protected: - static void DeleteNodeAtIndex(ArenaVector<SchedulingNode*>* nodes, size_t index) { + static void DeleteNodeAtIndex(ScopedArenaVector<SchedulingNode*>* nodes, size_t index) { (*nodes)[index] = nodes->back(); nodes->pop_back(); } @@ -382,12 +392,12 @@ class SchedulingNodeSelector : public ArenaObject<kArenaAllocScheduler> { */ class RandomSchedulingNodeSelector : public SchedulingNodeSelector { public: - explicit RandomSchedulingNodeSelector() : seed_(0) { + RandomSchedulingNodeSelector() : seed_(0) { seed_ = static_cast<uint32_t>(NanoTime()); srand(seed_); } - SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes, + SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) OVERRIDE { UNUSED(graph); DCHECK(!nodes->empty()); @@ -408,15 +418,15 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { public: CriticalPathSchedulingNodeSelector() : prev_select_(nullptr) {} - SchedulingNode* PopHighestPriorityNode(ArenaVector<SchedulingNode*>* nodes, + SchedulingNode* PopHighestPriorityNode(ScopedArenaVector<SchedulingNode*>* nodes, const SchedulingGraph& graph) OVERRIDE; protected: SchedulingNode* GetHigherPrioritySchedulingNode(SchedulingNode* candidate, SchedulingNode* check) const; - SchedulingNode* SelectMaterializedCondition(ArenaVector<SchedulingNode*>* nodes, - const SchedulingGraph& graph) const; + SchedulingNode* SelectMaterializedCondition(ScopedArenaVector<SchedulingNode*>* nodes, + const SchedulingGraph& graph) const; private: const SchedulingNode* prev_select_; @@ -424,16 +434,16 @@ class CriticalPathSchedulingNodeSelector : public SchedulingNodeSelector { class HScheduler { public: - HScheduler(ArenaAllocator* arena, + HScheduler(ScopedArenaAllocator* allocator, SchedulingLatencyVisitor* latency_visitor, SchedulingNodeSelector* selector) - : arena_(arena), + : allocator_(allocator), latency_visitor_(latency_visitor), selector_(selector), only_optimize_loop_blocks_(true), - scheduling_graph_(this, arena), + scheduling_graph_(this, allocator), cursor_(nullptr), - candidates_(arena_->Adapter(kArenaAllocScheduler)) {} + candidates_(allocator_->Adapter(kArenaAllocScheduler)) {} virtual ~HScheduler() {} void Schedule(HGraph* graph); @@ -452,6 +462,11 @@ class HScheduler { // containing basic block from being scheduled. // This method is used to restrict scheduling to instructions that we know are // safe to handle. + // + // For newly introduced instructions by default HScheduler::IsSchedulable returns false. + // HScheduler${ARCH}::IsSchedulable can be overridden to return true for an instruction (see + // scheduler_arm64.h for example) if it is safe to schedule it; in this case one *must* also + // look at/update HScheduler${ARCH}::IsSchedulingBarrier for this instruction. virtual bool IsSchedulable(const HInstruction* instruction) const; bool IsSchedulable(const HBasicBlock* block) const; @@ -461,7 +476,7 @@ class HScheduler { node->SetInternalLatency(latency_visitor_->GetLastVisitedInternalLatency()); } - ArenaAllocator* const arena_; + ScopedArenaAllocator* const allocator_; SchedulingLatencyVisitor* const latency_visitor_; SchedulingNodeSelector* const selector_; bool only_optimize_loop_blocks_; @@ -473,7 +488,7 @@ class HScheduler { HInstruction* cursor_; // The list of candidates for scheduling. A node becomes a candidate when all // its predecessors have been scheduled. - ArenaVector<SchedulingNode*> candidates_; + ScopedArenaVector<SchedulingNode*> candidates_; private: DISALLOW_COPY_AND_ASSIGN(HScheduler); @@ -485,8 +500,11 @@ inline bool SchedulingGraph::IsSchedulingBarrier(const HInstruction* instruction class HInstructionScheduling : public HOptimization { public: - HInstructionScheduling(HGraph* graph, InstructionSet instruction_set, CodeGenerator* cg = nullptr) - : HOptimization(graph, kInstructionScheduling), + HInstructionScheduling(HGraph* graph, + InstructionSet instruction_set, + CodeGenerator* cg = nullptr, + const char* name = kInstructionSchedulingPassName) + : HOptimization(graph, name), codegen_(cg), instruction_set_(instruction_set) {} @@ -495,7 +513,7 @@ class HInstructionScheduling : public HOptimization { } void Run(bool only_optimize_loop_blocks, bool schedule_randomly); - static constexpr const char* kInstructionScheduling = "scheduler"; + static constexpr const char* kInstructionSchedulingPassName = "scheduler"; private: CodeGenerator* const codegen_; diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc index e78cd78aa2..8dcadaad2e 100644 --- a/compiler/optimizing/scheduler_arm.cc +++ b/compiler/optimizing/scheduler_arm.cc @@ -14,11 +14,14 @@ * limitations under the License. */ +#include "scheduler_arm.h" + #include "arch/arm/instruction_set_features_arm.h" #include "code_generator_utils.h" #include "common_arm.h" +#include "heap_poisoning.h" #include "mirror/array-inl.h" -#include "scheduler_arm.h" +#include "mirror/string.h" namespace art { namespace arm { @@ -28,15 +31,15 @@ using helpers::Uint64ConstantFrom; void SchedulingLatencyVisitorARM::HandleBinaryOperationLantencies(HBinaryOperation* instr) { switch (instr->GetResultType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: // HAdd and HSub long operations translate to ADDS+ADC or SUBS+SBC pairs, // so a bubble (kArmNopLatency) is added to represent the internal carry flag // dependency inside these pairs. last_visited_internal_latency_ = kArmIntegerOpLatency + kArmNopLatency; last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: @@ -55,12 +58,12 @@ void SchedulingLatencyVisitorARM::VisitSub(HSub* instr) { void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { switch (instr->GetResultType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: last_visited_internal_latency_ = 3 * kArmMulIntegerLatency; last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: last_visited_latency_ = kArmMulFloatingPointLatency; break; default: @@ -71,12 +74,12 @@ void SchedulingLatencyVisitorARM::VisitMul(HMul* instr) { void SchedulingLatencyVisitorARM::HandleBitwiseOperationLantencies(HBinaryOperation* instr) { switch (instr->GetResultType()) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: @@ -99,10 +102,10 @@ void SchedulingLatencyVisitorARM::VisitXor(HXor* instr) { void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { switch (instr->GetResultType()) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { // HandleLongRotate HInstruction* rhs = instr->GetRight(); if (rhs->IsConstant()) { @@ -127,16 +130,16 @@ void SchedulingLatencyVisitorARM::VisitRor(HRor* instr) { } void SchedulingLatencyVisitorARM::HandleShiftLatencies(HBinaryOperation* instr) { - Primitive::Type type = instr->GetResultType(); + DataType::Type type = instr->GetResultType(); HInstruction* rhs = instr->GetRight(); switch (type) { - case Primitive::kPrimInt: + case DataType::Type::kInt32: if (!rhs->IsConstant()) { last_visited_internal_latency_ = kArmIntegerOpLatency; } last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (!rhs->IsConstant()) { last_visited_internal_latency_ = 8 * kArmIntegerOpLatency; } else { @@ -167,37 +170,364 @@ void SchedulingLatencyVisitorARM::VisitUShr(HUShr* instr) { HandleShiftLatencies(instr); } -void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { - switch (instr->GetLeft()->GetType()) { - case Primitive::kPrimLong: - last_visited_internal_latency_ = 4 * kArmIntegerOpLatency; +void SchedulingLatencyVisitorARM::HandleGenerateConditionWithZero(IfCondition condition) { + switch (condition) { + case kCondEQ: + case kCondBE: + case kCondNE: + case kCondA: + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + break; + case kCondGE: + // Mvn + last_visited_internal_latency_ += kArmIntegerOpLatency; + FALLTHROUGH_INTENDED; + case kCondLT: + // Lsr + last_visited_latency_ = kArmIntegerOpLatency; + break; + case kCondAE: + // Trivially true. + // Mov + last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: - last_visited_internal_latency_ = 2 * kArmFloatingPointOpLatency; + case kCondB: + // Trivially false. + // Mov + last_visited_latency_ = kArmIntegerOpLatency; break; default: - last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; + LOG(FATAL) << "Unexpected condition " << condition; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongTestConstant(HCondition* condition) { + DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); + + IfCondition cond = condition->GetCondition(); + + HInstruction* right = condition->InputAt(1); + + int64_t value = Uint64ConstantFrom(right); + + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (value == 0) { + switch (cond) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + // Orrs + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + case kCondLT: + case kCondGE: + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + case kCondB: + case kCondAE: + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + return; + default: + break; + } + } + + switch (cond) { + case kCondEQ: + case kCondNE: + case kCondB: + case kCondBE: + case kCondA: + case kCondAE: { + // Cmp, IT, Cmp + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; + break; + } + case kCondLE: + case kCondGT: + // Trivially true or false. + if (value == std::numeric_limits<int64_t>::max()) { + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + break; + } + FALLTHROUGH_INTENDED; + case kCondGE: + case kCondLT: { + // Cmp, Sbcs + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + break; + } + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongTest(HCondition* condition) { + DCHECK_EQ(condition->GetLeft()->GetType(), DataType::Type::kInt64); + + IfCondition cond = condition->GetCondition(); + + switch (cond) { + case kCondEQ: + case kCondNE: + case kCondB: + case kCondBE: + case kCondA: + case kCondAE: { + // Cmp, IT, Cmp + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; break; + } + case kCondLE: + case kCondGT: + case kCondGE: + case kCondLT: { + // Cmp, Sbcs + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + break; + } + default: + LOG(FATAL) << "Unreachable"; + UNREACHABLE(); } +} + +// The GenerateTest series of function all counted as internal latency. +void SchedulingLatencyVisitorARM::HandleGenerateTest(HCondition* condition) { + const DataType::Type type = condition->GetLeft()->GetType(); + + if (type == DataType::Type::kInt64) { + condition->InputAt(1)->IsConstant() + ? HandleGenerateLongTestConstant(condition) + : HandleGenerateLongTest(condition); + } else if (DataType::IsFloatingPointType(type)) { + // GenerateVcmp + Vmrs + last_visited_internal_latency_ += 2 * kArmFloatingPointOpLatency; + } else { + // Cmp + last_visited_internal_latency_ += kArmIntegerOpLatency; + } +} + +bool SchedulingLatencyVisitorARM::CanGenerateTest(HCondition* condition) { + if (condition->GetLeft()->GetType() == DataType::Type::kInt64) { + HInstruction* right = condition->InputAt(1); + + if (right->IsConstant()) { + IfCondition c = condition->GetCondition(); + const uint64_t value = Uint64ConstantFrom(right); + + if (c < kCondLT || c > kCondGE) { + if (value != 0) { + return false; + } + } else if (c == kCondLE || c == kCondGT) { + if (value < std::numeric_limits<int64_t>::max() && + !codegen_->GetAssembler()->ShifterOperandCanHold( + SBC, High32Bits(value + 1), vixl32::FlagsUpdate::SetFlags)) { + return false; + } + } else if (!codegen_->GetAssembler()->ShifterOperandCanHold( + SBC, High32Bits(value), vixl32::FlagsUpdate::SetFlags)) { + return false; + } + } + } + + return true; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionGeneric(HCondition* cond) { + HandleGenerateTest(cond); + + // Unlike codegen pass, we cannot check 'out' register IsLow() here, + // because scheduling is before liveness(location builder) and register allocator, + // so we can only choose to follow one path of codegen by assuming otu.IsLow() is true. + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; } +void SchedulingLatencyVisitorARM::HandleGenerateEqualLong(HCondition* cond) { + DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); + + IfCondition condition = cond->GetCondition(); + + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + + if (condition == kCondNE) { + // Orrs, IT, Mov + last_visited_internal_latency_ += 3 * kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + } +} + +void SchedulingLatencyVisitorARM::HandleGenerateLongComparesAndJumps() { + last_visited_internal_latency_ += 4 * kArmIntegerOpLatency; + last_visited_internal_latency_ += kArmBranchLatency; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionLong(HCondition* cond) { + DCHECK_EQ(cond->GetLeft()->GetType(), DataType::Type::kInt64); + + IfCondition condition = cond->GetCondition(); + HInstruction* right = cond->InputAt(1); + + if (right->IsConstant()) { + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (Uint64ConstantFrom(right) == 0) { + switch (condition) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + // Orr + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + return; + case kCondLT: + case kCondGE: + FALLTHROUGH_INTENDED; + case kCondAE: + case kCondB: + HandleGenerateConditionWithZero(condition); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if ((condition == kCondEQ || condition == kCondNE) && + !CanGenerateTest(cond)) { + HandleGenerateEqualLong(cond); + return; + } + + if (CanGenerateTest(cond)) { + HandleGenerateConditionGeneric(cond); + return; + } + + HandleGenerateLongComparesAndJumps(); + + last_visited_internal_latency_ += kArmIntegerOpLatency; + last_visited_latency_ = kArmBranchLatency;; +} + +void SchedulingLatencyVisitorARM::HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond) { + const DataType::Type type = cond->GetLeft()->GetType(); + + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; + + if (type == DataType::Type::kInt64) { + HandleGenerateConditionLong(cond); + return; + } + + IfCondition condition = cond->GetCondition(); + HInstruction* right = cond->InputAt(1); + int64_t value; + + if (right->IsConstant()) { + value = Uint64ConstantFrom(right); + + // Comparisons against 0 are common enough, so codegen has special handling for them. + if (value == 0) { + switch (condition) { + case kCondNE: + case kCondA: + case kCondEQ: + case kCondBE: + case kCondLT: + case kCondGE: + case kCondAE: + case kCondB: + HandleGenerateConditionWithZero(condition); + return; + case kCondLE: + case kCondGT: + default: + break; + } + } + } + + if (condition == kCondEQ || condition == kCondNE) { + if (condition == kCondNE) { + // CMP, IT, MOV.ne + last_visited_internal_latency_ += 2 * kArmIntegerOpLatency; + last_visited_latency_ = kArmIntegerOpLatency; + } else { + last_visited_internal_latency_ += kArmIntegerOpLatency; + HandleGenerateConditionWithZero(condition); + } + return; + } + + HandleGenerateConditionGeneric(cond); +} + +void SchedulingLatencyVisitorARM::HandleCondition(HCondition* cond) { + if (cond->IsEmittedAtUseSite()) { + last_visited_latency_ = 0; + return; + } + + const DataType::Type type = cond->GetLeft()->GetType(); + + if (DataType::IsFloatingPointType(type)) { + HandleGenerateConditionGeneric(cond); + return; + } + + DCHECK(DataType::IsIntegralType(type) || type == DataType::Type::kReference) << type; + + const IfCondition condition = cond->GetCondition(); + + if (type == DataType::Type::kBool && + cond->GetRight()->GetType() == DataType::Type::kBool && + (condition == kCondEQ || condition == kCondNE)) { + if (condition == kCondEQ) { + last_visited_internal_latency_ = kArmIntegerOpLatency; + } + last_visited_latency_ = kArmIntegerOpLatency; + return; + } + + HandleGenerateConditionIntegralOrNonPrimitive(cond); +} + +void SchedulingLatencyVisitorARM::VisitCondition(HCondition* instr) { + HandleCondition(instr); +} + void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { - Primitive::Type type = instr->InputAt(0)->GetType(); + DataType::Type type = instr->InputAt(0)->GetType(); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: last_visited_internal_latency_ = 2 * kArmIntegerOpLatency; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: last_visited_internal_latency_ = 2 * kArmIntegerOpLatency + 3 * kArmBranchLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: last_visited_internal_latency_ = kArmIntegerOpLatency + 2 * kArmFloatingPointOpLatency; break; default: @@ -208,7 +538,7 @@ void SchedulingLatencyVisitorARM::VisitCompare(HCompare* instr) { } void SchedulingLatencyVisitorARM::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) { - if (instruction->GetResultType() == Primitive::kPrimInt) { + if (instruction->GetResultType() == DataType::Type::kInt32) { last_visited_latency_ = kArmIntegerOpLatency; } else { last_visited_internal_latency_ = kArmIntegerOpLatency; @@ -239,7 +569,7 @@ void SchedulingLatencyVisitorARM::HandleGenerateDataProc(HDataProcWithShifterOp* } void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction) { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); DCHECK(HDataProcWithShifterOp::IsShiftOp(instruction->GetOpKind())); const uint32_t shift_value = instruction->GetShiftAmount(); @@ -268,11 +598,10 @@ void SchedulingLatencyVisitorARM::HandleGenerateLongDataProc(HDataProcWithShifte void SchedulingLatencyVisitorARM::VisitDataProcWithShifterOp(HDataProcWithShifterOp* instruction) { const HDataProcWithShifterOp::OpKind op_kind = instruction->GetOpKind(); - if (instruction->GetType() == Primitive::kPrimInt) { - DCHECK(!HDataProcWithShifterOp::IsExtensionOp(op_kind)); + if (instruction->GetType() == DataType::Type::kInt32) { HandleGenerateDataProcInstruction(); } else { - DCHECK_EQ(instruction->GetType(), Primitive::kPrimLong); + DCHECK_EQ(instruction->GetType(), DataType::Type::kInt64); if (HDataProcWithShifterOp::IsExtensionOp(op_kind)) { HandleGenerateDataProc(instruction); } else { @@ -298,7 +627,7 @@ void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* A } void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { - Primitive::Type type = instruction->GetType(); + DataType::Type type = instruction->GetType(); const bool maybe_compressed_char_at = mirror::kUseStringCompression && instruction->IsStringCharAt(); HInstruction* array_instr = instruction->GetArray(); @@ -306,11 +635,12 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { HInstruction* index = instruction->InputAt(1); switch (type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (maybe_compressed_char_at) { last_visited_internal_latency_ += kArmMemoryLoadLatency; } @@ -338,7 +668,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { last_visited_latency_ = kArmLoadWithBakerReadBarrierLatency; } else { @@ -355,7 +685,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -365,7 +695,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -375,7 +705,7 @@ void SchedulingLatencyVisitorARM::VisitArrayGet(HArrayGet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -401,16 +731,17 @@ void SchedulingLatencyVisitorARM::VisitArrayLength(HArrayLength* instruction) { void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { HInstruction* index = instruction->InputAt(1); - Primitive::Type value_type = instruction->GetComponentType(); + DataType::Type value_type = instruction->GetComponentType(); HInstruction* array_instr = instruction->GetArray(); bool has_intermediate_address = array_instr->IsIntermediateAddress(); switch (value_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: { + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryStoreLatency; } else { @@ -423,7 +754,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimNot: { + case DataType::Type::kReference: { if (instruction->InputAt(2)->IsNullConstant()) { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryStoreLatency; @@ -439,7 +770,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimLong: { + case DataType::Type::kInt64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -449,7 +780,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimFloat: { + case DataType::Type::kFloat32: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -459,7 +790,7 @@ void SchedulingLatencyVisitorARM::VisitArraySet(HArraySet* instruction) { break; } - case Primitive::kPrimDouble: { + case DataType::Type::kFloat64: { if (index->IsConstant()) { last_visited_latency_ = kArmMemoryLoadLatency; } else { @@ -497,9 +828,9 @@ void SchedulingLatencyVisitorARM::HandleDivRemConstantIntegralLatencies(int32_t } void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { - Primitive::Type type = instruction->GetResultType(); + DataType::Type type = instruction->GetResultType(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { HInstruction* rhs = instruction->GetRight(); if (rhs->IsConstant()) { int32_t imm = Int32ConstantFrom(rhs->AsConstant()); @@ -509,10 +840,10 @@ void SchedulingLatencyVisitorARM::VisitDiv(HDiv* instruction) { } break; } - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: last_visited_latency_ = kArmDivFloatLatency; break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: last_visited_latency_ = kArmDivDoubleLatency; break; default: @@ -560,9 +891,9 @@ void SchedulingLatencyVisitorARM::VisitNewInstance(HNewInstance* instruction) { } void SchedulingLatencyVisitorARM::VisitRem(HRem* instruction) { - Primitive::Type type = instruction->GetResultType(); + DataType::Type type = instruction->GetResultType(); switch (type) { - case Primitive::kPrimInt: { + case DataType::Type::kInt32: { HInstruction* rhs = instruction->GetRight(); if (rhs->IsConstant()) { int32_t imm = Int32ConstantFrom(rhs->AsConstant()); @@ -585,19 +916,20 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet()); DCHECK(codegen_ != nullptr); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: last_visited_latency_ = kArmMemoryLoadLatency; break; - case Primitive::kPrimNot: + case DataType::Type::kReference: if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; @@ -606,7 +938,7 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency; last_visited_latency_ = kArmMemoryLoadLatency; @@ -615,11 +947,11 @@ void SchedulingLatencyVisitorARM::HandleFieldGetLatencies(HInstruction* instruct } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: last_visited_latency_ = kArmMemoryLoadLatency; break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmMemoryLoadLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency; @@ -644,16 +976,17 @@ void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruct DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet()); DCHECK(codegen_ != nullptr); bool is_volatile = field_info.IsVolatile(); - Primitive::Type field_type = field_info.GetFieldType(); + DataType::Type field_type = field_info.GetFieldType(); bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1)); bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd(); switch (field_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimShort: - case Primitive::kPrimChar: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: if (is_volatile) { last_visited_internal_latency_ = kArmMemoryBarrierLatency + kArmMemoryStoreLatency; last_visited_latency_ = kArmMemoryBarrierLatency; @@ -662,15 +995,15 @@ void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruct } break; - case Primitive::kPrimInt: - case Primitive::kPrimNot: + case DataType::Type::kInt32: + case DataType::Type::kReference: if (kPoisonHeapReferences && needs_write_barrier) { last_visited_internal_latency_ += kArmIntegerOpLatency * 2; } last_visited_latency_ = kArmMemoryStoreLatency; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; @@ -680,11 +1013,11 @@ void SchedulingLatencyVisitorARM::HandleFieldSetLatencies(HInstruction* instruct } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: last_visited_latency_ = kArmMemoryStoreLatency; break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: if (is_volatile && !atomic_ldrd_strd) { last_visited_internal_latency_ = kArmIntegerOpLatency + kArmIntegerOpLatency + kArmMemoryLoadLatency + kArmMemoryStoreLatency; @@ -717,23 +1050,24 @@ void SchedulingLatencyVisitorARM::VisitSuspendCheck(HSuspendCheck* instruction) } void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { - Primitive::Type result_type = instr->GetResultType(); - Primitive::Type input_type = instr->GetInputType(); + DataType::Type result_type = instr->GetResultType(); + DataType::Type input_type = instr->GetInputType(); switch (result_type) { - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: last_visited_latency_ = kArmIntegerOpLatency; // SBFX or UBFX break; - case Primitive::kPrimInt: + case DataType::Type::kInt32: switch (input_type) { - case Primitive::kPrimLong: + case DataType::Type::kInt64: last_visited_latency_ = kArmIntegerOpLatency; // MOV break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; @@ -743,19 +1077,20 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: switch (input_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: // MOV and extension last_visited_internal_latency_ = kArmIntegerOpLatency; last_visited_latency_ = kArmIntegerOpLatency; break; - case Primitive::kPrimFloat: - case Primitive::kPrimDouble: + case DataType::Type::kFloat32: + case DataType::Type::kFloat64: // invokes runtime last_visited_internal_latency_ = kArmCallInternalLatency; break; @@ -766,21 +1101,22 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: switch (input_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: // invokes runtime last_visited_internal_latency_ = kArmCallInternalLatency; break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: @@ -789,21 +1125,22 @@ void SchedulingLatencyVisitorARM::VisitTypeConversion(HTypeConversion* instr) { } break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: switch (input_type) { - case Primitive::kPrimBoolean: - case Primitive::kPrimByte: - case Primitive::kPrimChar: - case Primitive::kPrimShort: - case Primitive::kPrimInt: + case DataType::Type::kBool: + case DataType::Type::kUint8: + case DataType::Type::kInt8: + case DataType::Type::kUint16: + case DataType::Type::kInt16: + case DataType::Type::kInt32: last_visited_internal_latency_ = kArmTypeConversionFloatingPointIntegerLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; - case Primitive::kPrimLong: + case DataType::Type::kInt64: last_visited_internal_latency_ = 5 * kArmFloatingPointOpLatency; last_visited_latency_ = kArmFloatingPointOpLatency; break; - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: last_visited_latency_ = kArmFloatingPointOpLatency; break; default: diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h index a9f2295c35..0cb8684376 100644 --- a/compiler/optimizing/scheduler_arm.h +++ b/compiler/optimizing/scheduler_arm.h @@ -109,6 +109,17 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { #undef DECLARE_VISIT_INSTRUCTION private: + bool CanGenerateTest(HCondition* cond); + void HandleGenerateConditionWithZero(IfCondition cond); + void HandleGenerateLongTestConstant(HCondition* cond); + void HandleGenerateLongTest(HCondition* cond); + void HandleGenerateLongComparesAndJumps(); + void HandleGenerateTest(HCondition* cond); + void HandleGenerateConditionGeneric(HCondition* cond); + void HandleGenerateEqualLong(HCondition* cond); + void HandleGenerateConditionLong(HCondition* cond); + void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); + void HandleCondition(HCondition* instr); void HandleBinaryOperationLantencies(HBinaryOperation* instr); void HandleBitwiseOperationLantencies(HBinaryOperation* instr); void HandleShiftLatencies(HBinaryOperation* instr); @@ -126,10 +137,10 @@ class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { class HSchedulerARM : public HScheduler { public: - HSchedulerARM(ArenaAllocator* arena, + HSchedulerARM(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector, SchedulingLatencyVisitorARM* arm_latency_visitor) - : HScheduler(arena, arm_latency_visitor, selector) {} + : HScheduler(allocator, arm_latency_visitor, selector) {} ~HSchedulerARM() OVERRIDE {} bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc index 83b487fb5b..7bcf4e75a9 100644 --- a/compiler/optimizing/scheduler_arm64.cc +++ b/compiler/optimizing/scheduler_arm64.cc @@ -15,14 +15,16 @@ */ #include "scheduler_arm64.h" + #include "code_generator_utils.h" #include "mirror/array-inl.h" +#include "mirror/string.h" namespace art { namespace arm64 { void SchedulingLatencyVisitorARM64::VisitBinaryOperation(HBinaryOperation* instr) { - last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType()) + last_visited_latency_ = DataType::IsFloatingPointType(instr->GetResultType()) ? kArm64FloatingPointOpLatency : kArm64IntegerOpLatency; } @@ -78,12 +80,12 @@ void SchedulingLatencyVisitorARM64::VisitBoundsCheck(HBoundsCheck* ATTRIBUTE_UNU } void SchedulingLatencyVisitorARM64::VisitDiv(HDiv* instr) { - Primitive::Type type = instr->GetResultType(); + DataType::Type type = instr->GetResultType(); switch (type) { - case Primitive::kPrimFloat: + case DataType::Type::kFloat32: last_visited_latency_ = kArm64DivFloatLatency; break; - case Primitive::kPrimDouble: + case DataType::Type::kFloat64: last_visited_latency_ = kArm64DivDoubleLatency; break; default: @@ -131,7 +133,7 @@ void SchedulingLatencyVisitorARM64::VisitLoadString(HLoadString* ATTRIBUTE_UNUSE } void SchedulingLatencyVisitorARM64::VisitMul(HMul* instr) { - last_visited_latency_ = Primitive::IsFloatingPointType(instr->GetResultType()) + last_visited_latency_ = DataType::IsFloatingPointType(instr->GetResultType()) ? kArm64MulFloatingPointLatency : kArm64MulIntegerLatency; } @@ -151,7 +153,7 @@ void SchedulingLatencyVisitorARM64::VisitNewInstance(HNewInstance* instruction) } void SchedulingLatencyVisitorARM64::VisitRem(HRem* instruction) { - if (Primitive::IsFloatingPointType(instruction->GetResultType())) { + if (DataType::IsFloatingPointType(instruction->GetResultType())) { last_visited_internal_latency_ = kArm64CallInternalLatency; last_visited_latency_ = kArm64CallLatency; } else { @@ -192,8 +194,8 @@ void SchedulingLatencyVisitorARM64::VisitSuspendCheck(HSuspendCheck* instruction } void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) { - if (Primitive::IsFloatingPointType(instr->GetResultType()) || - Primitive::IsFloatingPointType(instr->GetInputType())) { + if (DataType::IsFloatingPointType(instr->GetResultType()) || + DataType::IsFloatingPointType(instr->GetInputType())) { last_visited_latency_ = kArm64TypeConversionFloatingPointIntegerLatency; } else { last_visited_latency_ = kArm64IntegerOpLatency; @@ -201,7 +203,7 @@ void SchedulingLatencyVisitorARM64::VisitTypeConversion(HTypeConversion* instr) } void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) { - if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + if (DataType::IsFloatingPointType(instr->GetPackedType())) { last_visited_latency_ = kArm64SIMDFloatingPointOpLatency; } else { last_visited_latency_ = kArm64SIMDIntegerOpLatency; @@ -213,12 +215,12 @@ void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar( last_visited_latency_ = kArm64SIMDReplicateOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecExtractScalar(HVecExtractScalar* instr) { + HandleSimpleArithmeticSIMD(instr); } -void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecReduce(HVecReduce* instr) { + HandleSimpleArithmeticSIMD(instr); } void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) { @@ -234,7 +236,7 @@ void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) { } void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) { - if (instr->GetPackedType() == Primitive::kPrimBoolean) { + if (instr->GetPackedType() == DataType::Type::kBool) { last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency; } last_visited_latency_ = kArm64SIMDIntegerOpLatency; @@ -253,7 +255,7 @@ void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) { } void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) { - if (Primitive::IsFloatingPointType(instr->GetPackedType())) { + if (DataType::IsFloatingPointType(instr->GetPackedType())) { last_visited_latency_ = kArm64SIMDMulFloatingPointLatency; } else { last_visited_latency_ = kArm64SIMDMulIntegerLatency; @@ -261,10 +263,10 @@ void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) { } void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) { - if (instr->GetPackedType() == Primitive::kPrimFloat) { + if (instr->GetPackedType() == DataType::Type::kFloat32) { last_visited_latency_ = kArm64SIMDDivFloatLatency; } else { - DCHECK(instr->GetPackedType() == Primitive::kPrimDouble); + DCHECK(instr->GetPackedType() == DataType::Type::kFloat64); last_visited_latency_ = kArm64SIMDDivDoubleLatency; } } @@ -281,8 +283,8 @@ void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) last_visited_latency_ = kArm64SIMDIntegerOpLatency; } -void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) { - LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId(); +void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr ATTRIBUTE_UNUSED) { + last_visited_latency_ = kArm64SIMDIntegerOpLatency; } void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) { @@ -305,6 +307,10 @@ void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) { HandleSimpleArithmeticSIMD(instr); } +void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) { + HandleSimpleArithmeticSIMD(instr); +} + void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate( HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) { last_visited_latency_ = kArm64SIMDMulIntegerLatency; @@ -321,9 +327,9 @@ void SchedulingLatencyVisitorARM64::HandleVecAddress( void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) { last_visited_internal_latency_ = 0; - size_t size = Primitive::ComponentSize(instr->GetPackedType()); + size_t size = DataType::Size(instr->GetPackedType()); - if (instr->GetPackedType() == Primitive::kPrimChar + if (instr->GetPackedType() == DataType::Type::kUint16 && mirror::kUseStringCompression && instr->IsStringCharAt()) { // Set latencies for the uncompressed case. @@ -338,7 +344,7 @@ void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) { void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) { last_visited_internal_latency_ = 0; - size_t size = Primitive::ComponentSize(instr->GetPackedType()); + size_t size = DataType::Size(instr->GetPackedType()); HandleVecAddress(instr, size); last_visited_latency_ = kArm64SIMDMemoryStoreLatency; } diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h index 63d5b7d6b6..f71cb5b784 100644 --- a/compiler/optimizing/scheduler_arm64.h +++ b/compiler/optimizing/scheduler_arm64.h @@ -83,8 +83,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(SuspendCheck , unused) \ M(TypeConversion , unused) \ M(VecReplicateScalar , unused) \ - M(VecSetScalars , unused) \ - M(VecSumReduce , unused) \ + M(VecExtractScalar , unused) \ + M(VecReduce , unused) \ M(VecCnv , unused) \ M(VecNeg , unused) \ M(VecAbs , unused) \ @@ -103,6 +103,7 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { M(VecShl , unused) \ M(VecShr , unused) \ M(VecUShr , unused) \ + M(VecSetScalars , unused) \ M(VecMultiplyAccumulate, unused) \ M(VecLoad , unused) \ M(VecStore , unused) @@ -130,8 +131,8 @@ class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { class HSchedulerARM64 : public HScheduler { public: - HSchedulerARM64(ArenaAllocator* arena, SchedulingNodeSelector* selector) - : HScheduler(arena, &arm64_latency_visitor_, selector) {} + HSchedulerARM64(ScopedArenaAllocator* allocator, SchedulingNodeSelector* selector) + : HScheduler(allocator, &arm64_latency_visitor_, selector) {} ~HSchedulerARM64() OVERRIDE {} bool IsSchedulable(const HInstruction* instruction) const OVERRIDE { @@ -150,6 +151,20 @@ class HSchedulerARM64 : public HScheduler { #undef CASE_INSTRUCTION_KIND } + // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized + // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; + // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of + // SIMD&FP registers are callee saved) so don't reorder such vector instructions. + // + // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. + bool IsSchedulingBarrier(const HInstruction* instr) const OVERRIDE { + return HScheduler::IsSchedulingBarrier(instr) || + instr->IsVecReduce() || + instr->IsVecExtractScalar() || + instr->IsVecSetScalars() || + instr->IsVecReplicateScalar(); + } + private: SchedulingLatencyVisitorARM64 arm64_latency_visitor_; DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); diff --git a/compiler/optimizing/scheduler_test.cc b/compiler/optimizing/scheduler_test.cc index 10c3cd7535..fb15fc8975 100644 --- a/compiler/optimizing/scheduler_test.cc +++ b/compiler/optimizing/scheduler_test.cc @@ -14,6 +14,8 @@ * limitations under the License. */ +#include "scheduler.h" + #include "base/arena_allocator.h" #include "builder.h" #include "codegen_test_utils.h" @@ -23,7 +25,6 @@ #include "optimizing_unit_test.h" #include "pc_relative_fixups_x86.h" #include "register_allocator.h" -#include "scheduler.h" #ifdef ART_ENABLE_CODEGEN_arm64 #include "scheduler_arm64.h" @@ -42,22 +43,22 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { ::std::vector<CodegenTargetConfig> test_config_candidates = { #ifdef ART_ENABLE_CODEGEN_arm // TODO: Should't this be `kThumb2` instead of `kArm` here? - CodegenTargetConfig(kArm, create_codegen_arm_vixl32), + CodegenTargetConfig(InstructionSet::kArm, create_codegen_arm_vixl32), #endif #ifdef ART_ENABLE_CODEGEN_arm64 - CodegenTargetConfig(kArm64, create_codegen_arm64), + CodegenTargetConfig(InstructionSet::kArm64, create_codegen_arm64), #endif #ifdef ART_ENABLE_CODEGEN_x86 - CodegenTargetConfig(kX86, create_codegen_x86), + CodegenTargetConfig(InstructionSet::kX86, create_codegen_x86), #endif #ifdef ART_ENABLE_CODEGEN_x86_64 - CodegenTargetConfig(kX86_64, create_codegen_x86_64), + CodegenTargetConfig(InstructionSet::kX86_64, create_codegen_x86_64), #endif #ifdef ART_ENABLE_CODEGEN_mips - CodegenTargetConfig(kMips, create_codegen_mips), + CodegenTargetConfig(InstructionSet::kMips, create_codegen_mips), #endif #ifdef ART_ENABLE_CODEGEN_mips64 - CodegenTargetConfig(kMips64, create_codegen_mips64) + CodegenTargetConfig(InstructionSet::kMips64, create_codegen_mips64) #endif }; @@ -70,16 +71,14 @@ static ::std::vector<CodegenTargetConfig> GetTargetConfigs() { return v; } -class SchedulerTest : public CommonCompilerTest { +class SchedulerTest : public OptimizingUnitTest { public: - SchedulerTest() : pool_(), allocator_(&pool_) { - graph_ = CreateGraph(&allocator_); - } + SchedulerTest() : graph_(CreateGraph()) { } // Build scheduling graph, and run target specific scheduling on it. void TestBuildDependencyGraphAndSchedule(HScheduler* scheduler) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); - HBasicBlock* block1 = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* block1 = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->AddBlock(block1); graph_->SetEntryBlock(entry); @@ -99,21 +98,25 @@ class SchedulerTest : public CommonCompilerTest { // array_get2 ArrayGet [array, add1] // array_set2 ArraySet [array, add1, add2] - HInstruction* array = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); + HInstruction* array = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); HInstruction* c1 = graph_->GetIntConstant(1); HInstruction* c2 = graph_->GetIntConstant(10); - HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, c1, c2); - HInstruction* add2 = new (&allocator_) HAdd(Primitive::kPrimInt, add1, c2); - HInstruction* mul = new (&allocator_) HMul(Primitive::kPrimInt, add1, add2); - HInstruction* div_check = new (&allocator_) HDivZeroCheck(add2, 0); - HInstruction* div = new (&allocator_) HDiv(Primitive::kPrimInt, add1, div_check, 0); - HInstruction* array_get1 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set1 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); - HInstruction* array_get2 = new (&allocator_) HArrayGet(array, add1, Primitive::kPrimInt, 0); - HInstruction* array_set2 = new (&allocator_) HArraySet(array, add1, add2, Primitive::kPrimInt, 0); + HInstruction* add1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, c1, c2); + HInstruction* add2 = new (GetAllocator()) HAdd(DataType::Type::kInt32, add1, c2); + HInstruction* mul = new (GetAllocator()) HMul(DataType::Type::kInt32, add1, add2); + HInstruction* div_check = new (GetAllocator()) HDivZeroCheck(add2, 0); + HInstruction* div = new (GetAllocator()) HDiv(DataType::Type::kInt32, add1, div_check, 0); + HInstruction* array_get1 = + new (GetAllocator()) HArrayGet(array, add1, DataType::Type::kInt32, 0); + HInstruction* array_set1 = + new (GetAllocator()) HArraySet(array, add1, add2, DataType::Type::kInt32, 0); + HInstruction* array_get2 = + new (GetAllocator()) HArrayGet(array, add1, DataType::Type::kInt32, 0); + HInstruction* array_set2 = + new (GetAllocator()) HArraySet(array, add1, add2, DataType::Type::kInt32, 0); DCHECK(div_check->CanThrow()); @@ -132,18 +135,18 @@ class SchedulerTest : public CommonCompilerTest { block1->AddInstruction(instr); } - HEnvironment* environment = new (&allocator_) HEnvironment(&allocator_, - 2, - graph_->GetArtMethod(), - 0, - div_check); + HEnvironment* environment = new (GetAllocator()) HEnvironment(GetAllocator(), + 2, + graph_->GetArtMethod(), + 0, + div_check); div_check->SetRawEnvironment(environment); environment->SetRawEnvAt(0, add2); add2->AddEnvUseAt(div_check->GetEnvironment(), 0); environment->SetRawEnvAt(1, mul); mul->AddEnvUseAt(div_check->GetEnvironment(), 1); - SchedulingGraph scheduling_graph(scheduler, graph_->GetArena()); + SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); // Instructions must be inserted in reverse order into the scheduling graph. for (HInstruction* instr : ReverseRange(block_instructions)) { scheduling_graph.AddNode(instr); @@ -179,9 +182,11 @@ class SchedulerTest : public CommonCompilerTest { scheduler->Schedule(graph_); } - void CompileWithRandomSchedulerAndRun(const uint16_t* data, bool has_result, int expected) { + void CompileWithRandomSchedulerAndRun(const std::vector<uint16_t>& data, + bool has_result, + int expected) { for (CodegenTargetConfig target_config : GetTargetConfigs()) { - HGraph* graph = CreateCFG(&allocator_, data); + HGraph* graph = CreateCFG(data); // Schedule the graph randomly. HInstructionScheduling scheduling(graph, target_config.GetInstructionSet()); @@ -195,51 +200,57 @@ class SchedulerTest : public CommonCompilerTest { } void TestDependencyGraphOnAliasingArrayAccesses(HScheduler* scheduler) { - HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_); + HBasicBlock* entry = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry); graph_->SetEntryBlock(entry); graph_->BuildDominatorTree(); - HInstruction* arr = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(0), - 0, - Primitive::kPrimNot); - HInstruction* i = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(1), - 1, - Primitive::kPrimInt); - HInstruction* j = new (&allocator_) HParameterValue(graph_->GetDexFile(), - dex::TypeIndex(1), - 1, - Primitive::kPrimInt); - HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(), + HInstruction* arr = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), dex::TypeIndex(0), 0, - Primitive::kPrimNot); + DataType::Type::kReference); + HInstruction* i = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 1, + DataType::Type::kInt32); + HInstruction* j = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(1), + 1, + DataType::Type::kInt32); + HInstruction* object = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kReference); HInstruction* c0 = graph_->GetIntConstant(0); HInstruction* c1 = graph_->GetIntConstant(1); - HInstruction* add0 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c0); - HInstruction* add1 = new (&allocator_) HAdd(Primitive::kPrimInt, i, c1); - HInstruction* sub0 = new (&allocator_) HSub(Primitive::kPrimInt, i, c0); - HInstruction* sub1 = new (&allocator_) HSub(Primitive::kPrimInt, i, c1); - HInstruction* arr_set_0 = new (&allocator_) HArraySet(arr, c0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_1 = new (&allocator_) HArraySet(arr, c1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_i = new (&allocator_) HArraySet(arr, i, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_add0 = new (&allocator_) HArraySet(arr, add0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_add1 = new (&allocator_) HArraySet(arr, add1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_sub0 = new (&allocator_) HArraySet(arr, sub0, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_sub1 = new (&allocator_) HArraySet(arr, sub1, c0, Primitive::kPrimInt, 0); - HInstruction* arr_set_j = new (&allocator_) HArraySet(arr, j, c0, Primitive::kPrimInt, 0); - HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object, - c1, - nullptr, - Primitive::kPrimInt, - MemberOffset(10), - false, - kUnknownFieldIndex, - kUnknownClassDefIndex, - graph_->GetDexFile(), - 0); + HInstruction* add0 = new (GetAllocator()) HAdd(DataType::Type::kInt32, i, c0); + HInstruction* add1 = new (GetAllocator()) HAdd(DataType::Type::kInt32, i, c1); + HInstruction* sub0 = new (GetAllocator()) HSub(DataType::Type::kInt32, i, c0); + HInstruction* sub1 = new (GetAllocator()) HSub(DataType::Type::kInt32, i, c1); + HInstruction* arr_set_0 = + new (GetAllocator()) HArraySet(arr, c0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_1 = + new (GetAllocator()) HArraySet(arr, c1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_i = new (GetAllocator()) HArraySet(arr, i, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_add0 = + new (GetAllocator()) HArraySet(arr, add0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_add1 = + new (GetAllocator()) HArraySet(arr, add1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_sub0 = + new (GetAllocator()) HArraySet(arr, sub0, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_sub1 = + new (GetAllocator()) HArraySet(arr, sub1, c0, DataType::Type::kInt32, 0); + HInstruction* arr_set_j = new (GetAllocator()) HArraySet(arr, j, c0, DataType::Type::kInt32, 0); + HInstanceFieldSet* set_field10 = new (GetAllocator()) HInstanceFieldSet(object, + c1, + nullptr, + DataType::Type::kInt32, + MemberOffset(10), + false, + kUnknownFieldIndex, + kUnknownClassDefIndex, + graph_->GetDexFile(), + 0); HInstruction* block_instructions[] = {arr, i, @@ -263,7 +274,7 @@ class SchedulerTest : public CommonCompilerTest { entry->AddInstruction(instr); } - SchedulingGraph scheduling_graph(scheduler, graph_->GetArena()); + SchedulingGraph scheduling_graph(scheduler, GetScopedAllocator()); HeapLocationCollector heap_location_collector(graph_); heap_location_collector.VisitBasicBlock(entry); heap_location_collector.BuildAliasingMatrix(); @@ -285,38 +296,38 @@ class SchedulerTest : public CommonCompilerTest { size_t loc2 = HeapLocationCollector::kHeapLocationNotFound; // Test side effect dependency: array[0] and array[1] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, c0); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, c1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, c0); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, c1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_1, arr_set_0)); // Test side effect dependency based on LSA analysis: array[i] and array[j] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, j); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, j); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_j, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i+0] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, add0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_add0, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i-0] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub0); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub0); ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_TRUE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub0, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i] and array[i+1] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, i); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, i); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, add1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_add1, arr_set_i)); // Test side effect dependency based on LSA analysis: array[i+1] and array[i-1] - loc1 = heap_location_collector.GetArrayAccessHeapLocation(arr, add1); - loc2 = heap_location_collector.GetArrayAccessHeapLocation(arr, sub1); + loc1 = heap_location_collector.GetArrayHeapLocation(arr, add1); + loc2 = heap_location_collector.GetArrayHeapLocation(arr, sub1); ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2)); ASSERT_FALSE(scheduling_graph.HasImmediateOtherDependency(arr_set_sub1, arr_set_add1)); @@ -335,21 +346,19 @@ class SchedulerTest : public CommonCompilerTest { scheduler->Schedule(graph_); } - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; }; #if defined(ART_ENABLE_CODEGEN_arm64) TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); + arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { CriticalPathSchedulingNodeSelector critical_path_selector; - arm64::HSchedulerARM64 scheduler(&allocator_, &critical_path_selector); + arm64::HSchedulerARM64 scheduler(GetScopedAllocator(), &critical_path_selector); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif @@ -358,14 +367,14 @@ TEST_F(SchedulerTest, ArrayAccessAliasingARM64) { TEST_F(SchedulerTest, DependencyGraphAndSchedulerARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); TestBuildDependencyGraphAndSchedule(&scheduler); } TEST_F(SchedulerTest, ArrayAccessAliasingARM) { CriticalPathSchedulingNodeSelector critical_path_selector; arm::SchedulingLatencyVisitorARM arm_latency_visitor(/*CodeGenerator*/ nullptr); - arm::HSchedulerARM scheduler(&allocator_, &critical_path_selector, &arm_latency_visitor); + arm::HSchedulerARM scheduler(GetScopedAllocator(), &critical_path_selector, &arm_latency_visitor); TestDependencyGraphOnAliasingArrayAccesses(&scheduler); } #endif @@ -386,7 +395,7 @@ TEST_F(SchedulerTest, RandomScheduling) { // } // return result; // - const uint16_t data[] = SIX_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = SIX_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 << 12 | 2 << 8, // const/4 v2, #int 0 Instruction::CONST_HIGH16 | 0 << 8, 0x4120, // const/high16 v0, #float 10.0 // #41200000 Instruction::CONST_4 | 1 << 12 | 1 << 8, // const/4 v1, #int 1 diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc index 46d0d0eb65..66e51421ca 100644 --- a/compiler/optimizing/select_generator.cc +++ b/compiler/optimizing/select_generator.cc @@ -16,13 +16,23 @@ #include "select_generator.h" +#include "reference_type_propagation.h" + namespace art { static constexpr size_t kMaxInstructionsInBranch = 1u; -// Returns true if `block` has only one predecessor, ends with a Goto and -// contains at most `kMaxInstructionsInBranch` other movable instruction with -// no side-effects. +HSelectGenerator::HSelectGenerator(HGraph* graph, + VariableSizedHandleScope* handles, + OptimizingCompilerStats* stats, + const char* name) + : HOptimization(graph, name, stats), + handle_scope_(handles) { +} + +// Returns true if `block` has only one predecessor, ends with a Goto +// or a Return and contains at most `kMaxInstructionsInBranch` other +// movable instruction with no side-effects. static bool IsSimpleBlock(HBasicBlock* block) { if (block->GetPredecessors().size() != 1u) { return false; @@ -33,7 +43,10 @@ static bool IsSimpleBlock(HBasicBlock* block) { for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { HInstruction* instruction = it.Current(); if (instruction->IsControlFlow()) { - return instruction->IsGoto() && num_instructions <= kMaxInstructionsInBranch; + if (num_instructions > kMaxInstructionsInBranch) { + return false; + } + return instruction->IsGoto() || instruction->IsReturn(); } else if (instruction->CanBeMoved() && !instruction->HasSideEffects()) { num_instructions++; } else { @@ -45,8 +58,8 @@ static bool IsSimpleBlock(HBasicBlock* block) { UNREACHABLE(); } -// Returns true if 'block1' and 'block2' are empty, merge into the same single -// successor and the successor can only be reached from them. +// Returns true if 'block1' and 'block2' are empty and merge into the +// same single successor. static bool BlocksMergeTogether(HBasicBlock* block1, HBasicBlock* block2) { return block1->GetSingleSuccessor() == block2->GetSingleSuccessor(); } @@ -94,53 +107,73 @@ void HSelectGenerator::Run() { // If the branches are not empty, move instructions in front of the If. // TODO(dbrazdil): This puts an instruction between If and its condition. // Implement moving of conditions to first users if possible. - if (!true_block->IsSingleGoto()) { + if (!true_block->IsSingleGoto() && !true_block->IsSingleReturn()) { true_block->GetFirstInstruction()->MoveBefore(if_instruction); } - if (!false_block->IsSingleGoto()) { + if (!false_block->IsSingleGoto() && !false_block->IsSingleReturn()) { false_block->GetFirstInstruction()->MoveBefore(if_instruction); } - DCHECK(true_block->IsSingleGoto()); - DCHECK(false_block->IsSingleGoto()); + DCHECK(true_block->IsSingleGoto() || true_block->IsSingleReturn()); + DCHECK(false_block->IsSingleGoto() || false_block->IsSingleReturn()); // Find the resulting true/false values. size_t predecessor_index_true = merge_block->GetPredecessorIndexOf(true_block); size_t predecessor_index_false = merge_block->GetPredecessorIndexOf(false_block); DCHECK_NE(predecessor_index_true, predecessor_index_false); + bool both_successors_return = true_block->IsSingleReturn() && false_block->IsSingleReturn(); HPhi* phi = GetSingleChangedPhi(merge_block, predecessor_index_true, predecessor_index_false); - if (phi == nullptr) { + + HInstruction* true_value = nullptr; + HInstruction* false_value = nullptr; + if (both_successors_return) { + true_value = true_block->GetFirstInstruction()->InputAt(0); + false_value = false_block->GetFirstInstruction()->InputAt(0); + } else if (phi != nullptr) { + true_value = phi->InputAt(predecessor_index_true); + false_value = phi->InputAt(predecessor_index_false); + } else { continue; } - HInstruction* true_value = phi->InputAt(predecessor_index_true); - HInstruction* false_value = phi->InputAt(predecessor_index_false); + DCHECK(both_successors_return || phi != nullptr); // Create the Select instruction and insert it in front of the If. - HSelect* select = new (graph_->GetArena()) HSelect(if_instruction->InputAt(0), - true_value, - false_value, - if_instruction->GetDexPc()); - if (phi->GetType() == Primitive::kPrimNot) { + HSelect* select = new (graph_->GetAllocator()) HSelect(if_instruction->InputAt(0), + true_value, + false_value, + if_instruction->GetDexPc()); + if (both_successors_return) { + if (true_value->GetType() == DataType::Type::kReference) { + DCHECK(false_value->GetType() == DataType::Type::kReference); + ReferenceTypePropagation::FixUpInstructionType(select, handle_scope_); + } + } else if (phi->GetType() == DataType::Type::kReference) { select->SetReferenceTypeInfo(phi->GetReferenceTypeInfo()); } block->InsertInstructionBefore(select, if_instruction); - // Remove the true branch which removes the corresponding Phi input. - // If left only with the false branch, the Phi is automatically removed. - phi->ReplaceInput(select, predecessor_index_false); + // Remove the true branch which removes the corresponding Phi + // input if needed. If left only with the false branch, the Phi is + // automatically removed. + if (both_successors_return) { + false_block->GetFirstInstruction()->ReplaceInput(select, 0); + } else { + phi->ReplaceInput(select, predecessor_index_false); + } + bool only_two_predecessors = (merge_block->GetPredecessors().size() == 2u); true_block->DisconnectAndDelete(); - DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); // Merge remaining blocks which are now connected with Goto. DCHECK_EQ(block->GetSingleSuccessor(), false_block); block->MergeWith(false_block); - if (only_two_predecessors) { + if (!both_successors_return && only_two_predecessors) { + DCHECK_EQ(only_two_predecessors, phi->GetBlock() == nullptr); DCHECK_EQ(block->GetSingleSuccessor(), merge_block); block->MergeWith(merge_block); } - MaybeRecordStat(MethodCompilationStat::kSelectGenerated); + MaybeRecordStat(stats_, MethodCompilationStat::kSelectGenerated); // No need to update dominance information, as we are simplifying // a simple diamond shape, where the join block is merged with the diff --git a/compiler/optimizing/select_generator.h b/compiler/optimizing/select_generator.h index c6dca581cc..bda57fd5c8 100644 --- a/compiler/optimizing/select_generator.h +++ b/compiler/optimizing/select_generator.h @@ -18,7 +18,7 @@ * This optimization recognizes the common diamond selection pattern and * replaces it with an instance of the HSelect instruction. * - * Recognized pattern: + * Recognized patterns: * * If [ Condition ] * / \ @@ -26,14 +26,30 @@ * \ / * Phi [FalseValue, TrueValue] * + * and + * + * If [ Condition ] + * / \ + * false branch true branch + * return FalseValue return TrueValue + * * The pattern will be simplified if `true_branch` and `false_branch` each * contain at most one instruction without any side effects. * - * Blocks are merged into one and Select replaces the If and the Phi: + * Blocks are merged into one and Select replaces the If and the Phi. + * + * For the first pattern it simplifies to: + * * true branch * false branch * Select [FalseValue, TrueValue, Condition] * + * For the second pattern it simplifies to: + * + * true branch + * false branch + * return Select [FalseValue, TrueValue, Condition] + * * Note: In order to recognize no side-effect blocks, this optimization must be * run after the instruction simplifier has removed redundant suspend checks. */ @@ -47,14 +63,17 @@ namespace art { class HSelectGenerator : public HOptimization { public: - HSelectGenerator(HGraph* graph, OptimizingCompilerStats* stats) - : HOptimization(graph, kSelectGeneratorPassName, stats) {} + HSelectGenerator(HGraph* graph, + VariableSizedHandleScope* handles, + OptimizingCompilerStats* stats, + const char* name = kSelectGeneratorPassName); void Run() OVERRIDE; static constexpr const char* kSelectGeneratorPassName = "select_generator"; private: + VariableSizedHandleScope* handle_scope_; DISALLOW_COPY_AND_ASSIGN(HSelectGenerator); }; diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc index 8bd568befd..1e49411c72 100644 --- a/compiler/optimizing/sharpening.cc +++ b/compiler/optimizing/sharpening.cc @@ -21,10 +21,9 @@ #include "base/enums.h" #include "class_linker.h" #include "code_generator.h" +#include "driver/compiler_driver.h" #include "driver/compiler_options.h" #include "driver/dex_compilation_unit.h" -#include "utils/dex_cache_arrays_layout-inl.h" -#include "driver/compiler_driver.h" #include "gc/heap.h" #include "gc/space/image_space.h" #include "handle_scope-inl.h" @@ -33,6 +32,7 @@ #include "nodes.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" +#include "utils/dex_cache_arrays_layout-inl.h" namespace art { @@ -45,8 +45,6 @@ void HSharpening::Run() { SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_, compiler_driver_); - } else if (instruction->IsLoadString()) { - ProcessLoadString(instruction->AsLoadString()); } // TODO: Move the sharpening of invoke-virtual/-interface/-super from HGraphBuilder // here. Rewrite it to avoid the CompilerDriver's reliance on verifier data @@ -147,10 +145,11 @@ void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, invoke->SetDispatchInfo(dispatch_info); } -HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, - CodeGenerator* codegen, - CompilerDriver* compiler_driver, - const DexCompilationUnit& dex_compilation_unit) { +HLoadClass::LoadKind HSharpening::ComputeLoadClassKind( + HLoadClass* load_class, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit) { Handle<mirror::Class> klass = load_class->GetClass(); DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall || load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass) @@ -205,11 +204,15 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, // TODO(ngeoffray): Generate HDeoptimize instead. desired_load_kind = HLoadClass::LoadKind::kRuntimeCall; } - } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) { - // AOT app compilation. Check if the class is in the boot image. - desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + } else if (is_in_boot_image) { + // AOT app compilation, boot image class. + if (codegen->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadClass::LoadKind::kBootImageClassTable; + } else { + desired_load_kind = HLoadClass::LoadKind::kBootImageAddress; + } } else { - // Not JIT and either the klass is not in boot image or we are compiling in PIC mode. + // Not JIT and the klass is not in boot image. desired_load_kind = HLoadClass::LoadKind::kBssEntry; } } @@ -233,7 +236,12 @@ HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class, return load_kind; } -void HSharpening::ProcessLoadString(HLoadString* load_string) { +void HSharpening::ProcessLoadString( + HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles) { DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall); const DexFile& dex_file = load_string->GetDexFile(); @@ -245,27 +253,27 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { ClassLinker* class_linker = runtime->GetClassLinker(); ScopedObjectAccess soa(Thread::Current()); StackHandleScope<1> hs(soa.Self()); - Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *compilation_unit_.GetDexFile()) - ? compilation_unit_.GetDexCache() + Handle<mirror::DexCache> dex_cache = IsSameDexFile(dex_file, *dex_compilation_unit.GetDexFile()) + ? dex_compilation_unit.GetDexCache() : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file)); - mirror::String* string = nullptr; + ObjPtr<mirror::String> string = nullptr; - if (codegen_->GetCompilerOptions().IsBootImage()) { + if (codegen->GetCompilerOptions().IsBootImage()) { // Compiling boot image. Resolve the string and allocate it if needed, to ensure // the string will be added to the boot image. DCHECK(!runtime->UseJitCompilation()); - string = class_linker->ResolveString(dex_file, string_index, dex_cache); + string = class_linker->ResolveString(string_index, dex_cache); CHECK(string != nullptr); - if (compiler_driver_->GetSupportBootImageFixup()) { - DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file)); + if (compiler_driver->GetSupportBootImageFixup()) { + DCHECK(ContainsElement(compiler_driver->GetDexFilesForOatFile(), &dex_file)); desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative; } else { // compiler_driver_test. Do not sharpen. desired_load_kind = HLoadString::LoadKind::kRuntimeCall; } } else if (runtime->UseJitCompilation()) { - DCHECK(!codegen_->GetCompilerOptions().GetCompilePic()); - string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); + DCHECK(!codegen->GetCompilerOptions().GetCompilePic()); + string = class_linker->LookupString(string_index, dex_cache.Get()); if (string != nullptr) { if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { desired_load_kind = HLoadString::LoadKind::kBootImageAddress; @@ -277,22 +285,24 @@ void HSharpening::ProcessLoadString(HLoadString* load_string) { } } else { // AOT app compilation. Try to lookup the string without allocating if not found. - string = class_linker->LookupString(dex_file, string_index, dex_cache.Get()); - if (string != nullptr && - runtime->GetHeap()->ObjectIsInBootImageSpace(string) && - !codegen_->GetCompilerOptions().GetCompilePic()) { - desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + string = class_linker->LookupString(string_index, dex_cache.Get()); + if (string != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(string)) { + if (codegen->GetCompilerOptions().GetCompilePic()) { + desired_load_kind = HLoadString::LoadKind::kBootImageInternTable; + } else { + desired_load_kind = HLoadString::LoadKind::kBootImageAddress; + } } else { desired_load_kind = HLoadString::LoadKind::kBssEntry; } } if (string != nullptr) { - load_string->SetString(handles_->NewHandle(string)); + load_string->SetString(handles->NewHandle(string)); } } DCHECK_NE(desired_load_kind, static_cast<HLoadString::LoadKind>(-1)); - HLoadString::LoadKind load_kind = codegen_->GetSupportedLoadStringKind(desired_load_kind); + HLoadString::LoadKind load_kind = codegen->GetSupportedLoadStringKind(desired_load_kind); load_string->SetLoadKind(load_kind); } diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h index f74b0afdbf..6df7d6d91e 100644 --- a/compiler/optimizing/sharpening.h +++ b/compiler/optimizing/sharpening.h @@ -34,25 +34,29 @@ class HSharpening : public HOptimization { public: HSharpening(HGraph* graph, CodeGenerator* codegen, - const DexCompilationUnit& compilation_unit, CompilerDriver* compiler_driver, - VariableSizedHandleScope* handles) - : HOptimization(graph, kSharpeningPassName), + const char* name = kSharpeningPassName) + : HOptimization(graph, name), codegen_(codegen), - compilation_unit_(compilation_unit), - compiler_driver_(compiler_driver), - handles_(handles) { } + compiler_driver_(compiler_driver) { } void Run() OVERRIDE; static constexpr const char* kSharpeningPassName = "sharpening"; + // Used by the builder. + static void ProcessLoadString(HLoadString* load_string, + CodeGenerator* codegen, + CompilerDriver* compiler_driver, + const DexCompilationUnit& dex_compilation_unit, + VariableSizedHandleScope* handles); + // Used by the builder and the inliner. static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class, CodeGenerator* codegen, CompilerDriver* compiler_driver, const DexCompilationUnit& dex_compilation_unit) - REQUIRES_SHARED(Locks::mutator_lock_); + REQUIRES_SHARED(Locks::mutator_lock_); // Used by Sharpening and InstructionSimplifier. static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, @@ -60,12 +64,8 @@ class HSharpening : public HOptimization { CompilerDriver* compiler_driver); private: - void ProcessLoadString(HLoadString* load_string); - CodeGenerator* codegen_; - const DexCompilationUnit& compilation_unit_; CompilerDriver* compiler_driver_; - VariableSizedHandleScope* handles_; }; } // namespace art diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h index fea47e66d9..c0f81a9c54 100644 --- a/compiler/optimizing/side_effects_analysis.h +++ b/compiler/optimizing/side_effects_analysis.h @@ -25,13 +25,13 @@ namespace art { class SideEffectsAnalysis : public HOptimization { public: - SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName) + explicit SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName) : HOptimization(graph, pass_name), graph_(graph), block_effects_(graph->GetBlocks().size(), - graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)), + graph->GetAllocator()->Adapter(kArenaAllocSideEffectsAnalysis)), loop_effects_(graph->GetBlocks().size(), - graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)) {} + graph->GetAllocator()->Adapter(kArenaAllocSideEffectsAnalysis)) {} SideEffects GetLoopEffects(HBasicBlock* block) const; SideEffects GetBlockEffects(HBasicBlock* block) const; diff --git a/compiler/optimizing/side_effects_test.cc b/compiler/optimizing/side_effects_test.cc index b01bc1ca0d..97317124ef 100644 --- a/compiler/optimizing/side_effects_test.cc +++ b/compiler/optimizing/side_effects_test.cc @@ -14,12 +14,26 @@ * limitations under the License. */ -#include "gtest/gtest.h" +#include <gtest/gtest.h> + +#include "data_type.h" #include "nodes.h" -#include "primitive.h" namespace art { +// Only runtime types other than void are allowed. +static const DataType::Type kTestTypes[] = { + DataType::Type::kReference, + DataType::Type::kBool, + DataType::Type::kInt8, + DataType::Type::kUint16, + DataType::Type::kInt16, + DataType::Type::kInt32, + DataType::Type::kInt64, + DataType::Type::kFloat32, + DataType::Type::kFloat64, +}; + /** * Tests for the SideEffects class. */ @@ -89,18 +103,16 @@ TEST(SideEffectsTest, None) { } TEST(SideEffectsTest, DependencesAndNoDependences) { - // Apply test to each individual primitive type. - for (Primitive::Type type = Primitive::kPrimNot; - type < Primitive::kPrimVoid; - type = Primitive::Type(type + 1)) { - // Same primitive type and access type: proper write/read dep. + // Apply test to each individual data type. + for (DataType::Type type : kTestTypes) { + // Same data type and access type: proper write/read dep. testWriteAndReadDependence( SideEffects::FieldWriteOfType(type, false), SideEffects::FieldReadOfType(type, false)); testWriteAndReadDependence( SideEffects::ArrayWriteOfType(type), SideEffects::ArrayReadOfType(type)); - // Same primitive type but different access type: no write/read dep. + // Same data type but different access type: no write/read dep. testNoWriteAndReadDependence( SideEffects::FieldWriteOfType(type, false), SideEffects::ArrayReadOfType(type)); @@ -111,31 +123,31 @@ TEST(SideEffectsTest, DependencesAndNoDependences) { } TEST(SideEffectsTest, NoDependences) { - // Different primitive type, same access type: no write/read dep. + // Different data type, same access type: no write/read dep. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, false), + SideEffects::FieldReadOfType(DataType::Type::kFloat64, false)); testNoWriteAndReadDependence( - SideEffects::ArrayWriteOfType(Primitive::kPrimInt), - SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); // Everything different: no write/read dep. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, false), - SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, false), + SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); testNoWriteAndReadDependence( - SideEffects::ArrayWriteOfType(Primitive::kPrimInt), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, false)); + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + SideEffects::FieldReadOfType(DataType::Type::kFloat64, false)); } TEST(SideEffectsTest, VolatileDependences) { SideEffects volatile_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ true); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ true); SideEffects any_write = - SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false); SideEffects volatile_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ true); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ true); SideEffects any_read = - SideEffects::FieldReadOfType(Primitive::kPrimByte, /* is_volatile */ false); + SideEffects::FieldReadOfType(DataType::Type::kInt8, /* is_volatile */ false); EXPECT_FALSE(volatile_write.MayDependOn(any_read)); EXPECT_TRUE(any_read.MayDependOn(volatile_write)); @@ -151,26 +163,24 @@ TEST(SideEffectsTest, VolatileDependences) { TEST(SideEffectsTest, SameWidthTypesNoAlias) { // Type I/F. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimInt, /* is_volatile */ false), - SideEffects::FieldReadOfType(Primitive::kPrimFloat, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt32, /* is_volatile */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat32, /* is_volatile */ false)); testNoWriteAndReadDependence( - SideEffects::ArrayWriteOfType(Primitive::kPrimInt), - SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); + SideEffects::ArrayWriteOfType(DataType::Type::kInt32), + SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); // Type L/D. testNoWriteAndReadDependence( - SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false), - SideEffects::FieldReadOfType(Primitive::kPrimDouble, /* is_volatile */ false)); + SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false), + SideEffects::FieldReadOfType(DataType::Type::kFloat64, /* is_volatile */ false)); testNoWriteAndReadDependence( - SideEffects::ArrayWriteOfType(Primitive::kPrimLong), - SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + SideEffects::ArrayWriteOfType(DataType::Type::kInt64), + SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); } TEST(SideEffectsTest, AllWritesAndReads) { SideEffects s = SideEffects::None(); // Keep taking the union of different writes and reads. - for (Primitive::Type type = Primitive::kPrimNot; - type < Primitive::kPrimVoid; - type = Primitive::Type(type + 1)) { + for (DataType::Type type : kTestTypes) { s = s.Union(SideEffects::FieldWriteOfType(type, /* is_volatile */ false)); s = s.Union(SideEffects::ArrayWriteOfType(type)); s = s.Union(SideEffects::FieldReadOfType(type, /* is_volatile */ false)); @@ -214,41 +224,41 @@ TEST(SideEffectsTest, BitStrings) { SideEffects::AllReads().ToString().c_str()); EXPECT_STREQ( "||||||L|", - SideEffects::FieldWriteOfType(Primitive::kPrimNot, false).ToString().c_str()); + SideEffects::FieldWriteOfType(DataType::Type::kReference, false).ToString().c_str()); EXPECT_STREQ( "||DFJISCBZL|DFJISCBZL||DFJISCBZL|DFJISCBZL|", - SideEffects::FieldWriteOfType(Primitive::kPrimNot, true).ToString().c_str()); + SideEffects::FieldWriteOfType(DataType::Type::kReference, true).ToString().c_str()); EXPECT_STREQ( "|||||Z||", - SideEffects::ArrayWriteOfType(Primitive::kPrimBoolean).ToString().c_str()); + SideEffects::ArrayWriteOfType(DataType::Type::kBool).ToString().c_str()); EXPECT_STREQ( "|||||C||", - SideEffects::ArrayWriteOfType(Primitive::kPrimChar).ToString().c_str()); + SideEffects::ArrayWriteOfType(DataType::Type::kUint16).ToString().c_str()); EXPECT_STREQ( "|||||S||", - SideEffects::ArrayWriteOfType(Primitive::kPrimShort).ToString().c_str()); + SideEffects::ArrayWriteOfType(DataType::Type::kInt16).ToString().c_str()); EXPECT_STREQ( "|||B||||", - SideEffects::FieldReadOfType(Primitive::kPrimByte, false).ToString().c_str()); + SideEffects::FieldReadOfType(DataType::Type::kInt8, false).ToString().c_str()); EXPECT_STREQ( "||D|||||", - SideEffects::ArrayReadOfType(Primitive::kPrimDouble).ToString().c_str()); + SideEffects::ArrayReadOfType(DataType::Type::kFloat64).ToString().c_str()); EXPECT_STREQ( "||J|||||", - SideEffects::ArrayReadOfType(Primitive::kPrimLong).ToString().c_str()); + SideEffects::ArrayReadOfType(DataType::Type::kInt64).ToString().c_str()); EXPECT_STREQ( "||F|||||", - SideEffects::ArrayReadOfType(Primitive::kPrimFloat).ToString().c_str()); + SideEffects::ArrayReadOfType(DataType::Type::kFloat32).ToString().c_str()); EXPECT_STREQ( "||I|||||", - SideEffects::ArrayReadOfType(Primitive::kPrimInt).ToString().c_str()); + SideEffects::ArrayReadOfType(DataType::Type::kInt32).ToString().c_str()); SideEffects s = SideEffects::None(); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimChar, /* is_volatile */ false)); - s = s.Union(SideEffects::FieldWriteOfType(Primitive::kPrimLong, /* is_volatile */ false)); - s = s.Union(SideEffects::ArrayWriteOfType(Primitive::kPrimShort)); - s = s.Union(SideEffects::FieldReadOfType(Primitive::kPrimInt, /* is_volatile */ false)); - s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimFloat)); - s = s.Union(SideEffects::ArrayReadOfType(Primitive::kPrimDouble)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kUint16, /* is_volatile */ false)); + s = s.Union(SideEffects::FieldWriteOfType(DataType::Type::kInt64, /* is_volatile */ false)); + s = s.Union(SideEffects::ArrayWriteOfType(DataType::Type::kInt16)); + s = s.Union(SideEffects::FieldReadOfType(DataType::Type::kInt32, /* is_volatile */ false)); + s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat32)); + s = s.Union(SideEffects::ArrayReadOfType(DataType::Type::kFloat64)); EXPECT_STREQ("||DF|I||S|JC|", s.ToString().c_str()); } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index 50ab11bc23..dd54468217 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -16,7 +16,8 @@ #include "ssa_builder.h" -#include "bytecode_utils.h" +#include "data_type-inl.h" +#include "dex/bytecode_utils.h" #include "mirror/class-inl.h" #include "nodes.h" #include "reference_type_propagation.h" @@ -37,10 +38,11 @@ void SsaBuilder::FixNullConstantType() { HInstruction* right = equality_instr->InputAt(1); HInstruction* int_operand = nullptr; - if ((left->GetType() == Primitive::kPrimNot) && (right->GetType() == Primitive::kPrimInt)) { + if ((left->GetType() == DataType::Type::kReference) && + (right->GetType() == DataType::Type::kInt32)) { int_operand = right; - } else if ((right->GetType() == Primitive::kPrimNot) - && (left->GetType() == Primitive::kPrimInt)) { + } else if ((right->GetType() == DataType::Type::kReference) && + (left->GetType() == DataType::Type::kInt32)) { int_operand = left; } else { continue; @@ -103,7 +105,7 @@ void SsaBuilder::FixEnvironmentPhis() { } static void AddDependentInstructionsToWorklist(HInstruction* instruction, - ArenaVector<HPhi*>* worklist) { + ScopedArenaVector<HPhi*>* worklist) { // If `instruction` is a dead phi, type conflict was just identified. All its // live phi users, and transitively users of those users, therefore need to be // marked dead/conflicting too, so we add them to the worklist. Otherwise we @@ -122,7 +124,7 @@ static void AddDependentInstructionsToWorklist(HInstruction* instruction, // Find a candidate primitive type for `phi` by merging the type of its inputs. // Return false if conflict is identified. static bool TypePhiFromInputs(HPhi* phi) { - Primitive::Type common_type = phi->GetType(); + DataType::Type common_type = phi->GetType(); for (HInstruction* input : phi->GetInputs()) { if (input->IsPhi() && input->AsPhi()->IsDead()) { @@ -131,26 +133,29 @@ static bool TypePhiFromInputs(HPhi* phi) { return false; } - Primitive::Type input_type = HPhi::ToPhiType(input->GetType()); + DataType::Type input_type = HPhi::ToPhiType(input->GetType()); if (common_type == input_type) { // No change in type. - } else if (Primitive::Is64BitType(common_type) != Primitive::Is64BitType(input_type)) { + } else if (DataType::Is64BitType(common_type) != DataType::Is64BitType(input_type)) { // Types are of different sizes, e.g. int vs. long. Must be a conflict. return false; - } else if (Primitive::IsIntegralType(common_type)) { + } else if (DataType::IsIntegralType(common_type)) { // Previous inputs were integral, this one is not but is of the same size. // This does not imply conflict since some bytecode instruction types are // ambiguous. TypeInputsOfPhi will either type them or detect a conflict. - DCHECK(Primitive::IsFloatingPointType(input_type) || input_type == Primitive::kPrimNot); + DCHECK(DataType::IsFloatingPointType(input_type) || + input_type == DataType::Type::kReference); common_type = input_type; - } else if (Primitive::IsIntegralType(input_type)) { + } else if (DataType::IsIntegralType(input_type)) { // Input is integral, common type is not. Same as in the previous case, if // there is a conflict, it will be detected during TypeInputsOfPhi. - DCHECK(Primitive::IsFloatingPointType(common_type) || common_type == Primitive::kPrimNot); + DCHECK(DataType::IsFloatingPointType(common_type) || + common_type == DataType::Type::kReference); } else { // Combining float and reference types. Clearly a conflict. - DCHECK((common_type == Primitive::kPrimFloat && input_type == Primitive::kPrimNot) || - (common_type == Primitive::kPrimNot && input_type == Primitive::kPrimFloat)); + DCHECK( + (common_type == DataType::Type::kFloat32 && input_type == DataType::Type::kReference) || + (common_type == DataType::Type::kReference && input_type == DataType::Type::kFloat32)); return false; } } @@ -162,9 +167,9 @@ static bool TypePhiFromInputs(HPhi* phi) { } // Replace inputs of `phi` to match its type. Return false if conflict is identified. -bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { - Primitive::Type common_type = phi->GetType(); - if (Primitive::IsIntegralType(common_type)) { +bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ScopedArenaVector<HPhi*>* worklist) { + DataType::Type common_type = phi->GetType(); + if (DataType::IsIntegralType(common_type)) { // We do not need to retype ambiguous inputs because they are always constructed // with the integral type candidate. if (kIsDebugBuild) { @@ -175,14 +180,15 @@ bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { // Inputs did not need to be replaced, hence no conflict. Report success. return true; } else { - DCHECK(common_type == Primitive::kPrimNot || Primitive::IsFloatingPointType(common_type)); + DCHECK(common_type == DataType::Type::kReference || + DataType::IsFloatingPointType(common_type)); HInputsRef inputs = phi->GetInputs(); for (size_t i = 0; i < inputs.size(); ++i) { HInstruction* input = inputs[i]; if (input->GetType() != common_type) { // Input type does not match phi's type. Try to retype the input or // generate a suitably typed equivalent. - HInstruction* equivalent = (common_type == Primitive::kPrimNot) + HInstruction* equivalent = (common_type == DataType::Type::kReference) ? GetReferenceTypeEquivalent(input) : GetFloatOrDoubleEquivalent(input, common_type); if (equivalent == nullptr) { @@ -207,9 +213,9 @@ bool SsaBuilder::TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist) { // Attempt to set the primitive type of `phi` to match its inputs. Return whether // it was changed by the algorithm or not. -bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { +bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ScopedArenaVector<HPhi*>* worklist) { DCHECK(phi->IsLive()); - Primitive::Type original_type = phi->GetType(); + DataType::Type original_type = phi->GetType(); // Try to type the phi in two stages: // (1) find a candidate type for the phi by merging types of all its inputs, @@ -227,7 +233,7 @@ bool SsaBuilder::UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist) { } void SsaBuilder::RunPrimitiveTypePropagation() { - ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HPhi*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); for (HBasicBlock* block : graph_->GetReversePostOrder()) { if (block->IsLoopHeader()) { @@ -256,7 +262,7 @@ void SsaBuilder::RunPrimitiveTypePropagation() { EquivalentPhisCleanup(); } -void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist) { +void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ScopedArenaVector<HPhi*>* worklist) { // Process worklist while (!worklist->empty()) { HPhi* phi = worklist->back(); @@ -270,8 +276,8 @@ void SsaBuilder::ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* wor } static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { - Primitive::Type type = aget->GetType(); - DCHECK(Primitive::IsIntOrLongType(type)); + DataType::Type type = aget->GetType(); + DCHECK(DataType::IsIntOrLongType(type)); HInstruction* next = aget->GetNext(); if (next != nullptr && next->IsArrayGet()) { HArrayGet* next_aget = next->AsArrayGet(); @@ -283,24 +289,25 @@ static HArrayGet* FindFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { } static HArrayGet* CreateFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { - Primitive::Type type = aget->GetType(); - DCHECK(Primitive::IsIntOrLongType(type)); + DataType::Type type = aget->GetType(); + DCHECK(DataType::IsIntOrLongType(type)); DCHECK(FindFloatOrDoubleEquivalentOfArrayGet(aget) == nullptr); - HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetArena()) HArrayGet( + HArrayGet* equivalent = new (aget->GetBlock()->GetGraph()->GetAllocator()) HArrayGet( aget->GetArray(), aget->GetIndex(), - type == Primitive::kPrimInt ? Primitive::kPrimFloat : Primitive::kPrimDouble, + type == DataType::Type::kInt32 ? DataType::Type::kFloat32 : DataType::Type::kFloat64, aget->GetDexPc()); aget->GetBlock()->InsertInstructionAfter(equivalent, aget); return equivalent; } -static Primitive::Type GetPrimitiveArrayComponentType(HInstruction* array) +static DataType::Type GetPrimitiveArrayComponentType(HInstruction* array) REQUIRES_SHARED(Locks::mutator_lock_) { ReferenceTypeInfo array_type = array->GetReferenceTypeInfo(); DCHECK(array_type.IsPrimitiveArrayClass()); - return array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType(); + return DataTypeFromPrimitive( + array_type.GetTypeHandle()->GetComponentType()->GetPrimitiveType()); } bool SsaBuilder::FixAmbiguousArrayOps() { @@ -312,7 +319,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() { // uses (because they are untyped) and environment uses (if --debuggable). // After resolving all ambiguous ArrayGets, we will re-run primitive type // propagation on the Phis which need to be updated. - ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder)); + ScopedArenaVector<HPhi*> worklist(local_allocator_->Adapter(kArenaAllocGraphBuilder)); { ScopedObjectAccess soa(Thread::Current()); @@ -321,14 +328,16 @@ bool SsaBuilder::FixAmbiguousArrayOps() { HInstruction* array = aget_int->GetArray(); if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) { // RTP did not type the input array. Bail. + VLOG(compiler) << "Not compiled: Could not infer an array type for array operation at " + << aget_int->GetDexPc(); return false; } HArrayGet* aget_float = FindFloatOrDoubleEquivalentOfArrayGet(aget_int); - Primitive::Type array_type = GetPrimitiveArrayComponentType(array); - DCHECK_EQ(Primitive::Is64BitType(aget_int->GetType()), Primitive::Is64BitType(array_type)); + DataType::Type array_type = GetPrimitiveArrayComponentType(array); + DCHECK_EQ(DataType::Is64BitType(aget_int->GetType()), DataType::Is64BitType(array_type)); - if (Primitive::IsIntOrLongType(array_type)) { + if (DataType::IsIntOrLongType(array_type)) { if (aget_float != nullptr) { // There is a float/double equivalent. We must replace it and re-run // primitive type propagation on all dependent instructions. @@ -337,7 +346,7 @@ bool SsaBuilder::FixAmbiguousArrayOps() { AddDependentInstructionsToWorklist(aget_int, &worklist); } } else { - DCHECK(Primitive::IsFloatingPointType(array_type)); + DCHECK(DataType::IsFloatingPointType(array_type)); if (aget_float == nullptr) { // This is a float/double ArrayGet but there were no typed uses which // would create the typed equivalent. Create it now. @@ -361,17 +370,19 @@ bool SsaBuilder::FixAmbiguousArrayOps() { HInstruction* array = aset->GetArray(); if (!array->GetReferenceTypeInfo().IsPrimitiveArrayClass()) { // RTP did not type the input array. Bail. + VLOG(compiler) << "Not compiled: Could not infer an array type for array operation at " + << aset->GetDexPc(); return false; } HInstruction* value = aset->GetValue(); - Primitive::Type value_type = value->GetType(); - Primitive::Type array_type = GetPrimitiveArrayComponentType(array); - DCHECK_EQ(Primitive::Is64BitType(value_type), Primitive::Is64BitType(array_type)); + DataType::Type value_type = value->GetType(); + DataType::Type array_type = GetPrimitiveArrayComponentType(array); + DCHECK_EQ(DataType::Is64BitType(value_type), DataType::Is64BitType(array_type)); - if (Primitive::IsFloatingPointType(array_type)) { - if (!Primitive::IsFloatingPointType(value_type)) { - DCHECK(Primitive::IsIntegralType(value_type)); + if (DataType::IsFloatingPointType(array_type)) { + if (!DataType::IsFloatingPointType(value_type)) { + DCHECK(DataType::IsIntegralType(value_type)); // Array elements are floating-point but the value has not been replaced // with its floating-point equivalent. The replacement must always // succeed in code validated by the verifier. @@ -386,12 +397,12 @@ bool SsaBuilder::FixAmbiguousArrayOps() { } // Refine the side effects of this floating point aset. Note that we do this even if // no replacement occurs, since the right-hand-side may have been corrected already. - aset->ComputeSideEffects(); + aset->SetSideEffects(HArraySet::ComputeSideEffects(aset->GetComponentType())); } else { // Array elements are integral and the value assigned to it initially // was integral too. Nothing to do. - DCHECK(Primitive::IsIntegralType(array_type)); - DCHECK(Primitive::IsIntegralType(value_type)); + DCHECK(DataType::IsIntegralType(array_type)); + DCHECK(DataType::IsIntegralType(value_type)); } } } @@ -559,7 +570,7 @@ HFloatConstant* SsaBuilder::GetFloatEquivalent(HIntConstant* constant) { HFloatConstant* result = constant->GetNext()->AsFloatConstant(); if (result == nullptr) { float value = bit_cast<float, int32_t>(constant->GetValue()); - result = new (graph_->GetArena()) HFloatConstant(value); + result = new (graph_->GetAllocator()) HFloatConstant(value); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); graph_->CacheFloatConstant(result); } else { @@ -581,7 +592,7 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { HDoubleConstant* result = constant->GetNext()->AsDoubleConstant(); if (result == nullptr) { double value = bit_cast<double, int64_t>(constant->GetValue()); - result = new (graph_->GetArena()) HDoubleConstant(value); + result = new (graph_->GetAllocator()) HDoubleConstant(value); constant->GetBlock()->InsertInstructionBefore(result, constant->GetNext()); graph_->CacheDoubleConstant(result); } else { @@ -599,7 +610,7 @@ HDoubleConstant* SsaBuilder::GetDoubleEquivalent(HLongConstant* constant) { * floating point registers and core registers), we need to create a copy of the * phi with a floating point / reference type. */ -HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type) { +HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType::Type type) { DCHECK(phi->IsLive()) << "Cannot get equivalent of a dead phi since it would create a live one."; // We place the floating point /reference phi next to this phi. @@ -614,10 +625,9 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: if (next == nullptr || (next->AsPhi()->GetRegNumber() != phi->GetRegNumber()) || (next->GetType() != type)) { - ArenaAllocator* allocator = graph_->GetArena(); + ArenaAllocator* allocator = graph_->GetAllocator(); HInputsRef inputs = phi->GetInputs(); - HPhi* new_phi = - new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); + HPhi* new_phi = new (allocator) HPhi(allocator, phi->GetRegNumber(), inputs.size(), type); // Copy the inputs. Note that the graph may not be correctly typed // by doing this copy, but the type propagation phase will fix it. ArrayRef<HUserRecord<HInstruction*>> new_input_records = new_phi->GetInputRecords(); @@ -637,9 +647,9 @@ HPhi* SsaBuilder::GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive: } HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { - DCHECK(Primitive::IsIntegralType(aget->GetType())); + DCHECK(DataType::IsIntegralType(aget->GetType())); - if (!Primitive::IsIntOrLongType(aget->GetType())) { + if (!DataType::IsIntOrLongType(aget->GetType())) { // Cannot type boolean, char, byte, short to float/double. return nullptr; } @@ -650,7 +660,7 @@ HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { // int/long. Requesting a float/double equivalent should lead to a conflict. if (kIsDebugBuild) { ScopedObjectAccess soa(Thread::Current()); - DCHECK(Primitive::IsIntOrLongType(GetPrimitiveArrayComponentType(aget->GetArray()))); + DCHECK(DataType::IsIntOrLongType(GetPrimitiveArrayComponentType(aget->GetArray()))); } return nullptr; } else { @@ -661,7 +671,7 @@ HArrayGet* SsaBuilder::GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget) { } } -HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, Primitive::Type type) { +HInstruction* SsaBuilder::GetFloatOrDoubleEquivalent(HInstruction* value, DataType::Type type) { if (value->IsArrayGet()) { return GetFloatOrDoubleEquivalentOfArrayGet(value->AsArrayGet()); } else if (value->IsLongConstant()) { @@ -679,7 +689,7 @@ HInstruction* SsaBuilder::GetReferenceTypeEquivalent(HInstruction* value) { if (value->IsIntConstant() && value->AsIntConstant()->GetValue() == 0) { return graph_->GetNullConstant(); } else if (value->IsPhi()) { - return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), Primitive::kPrimNot); + return GetFloatDoubleOrReferenceEquivalentOfPhi(value->AsPhi(), DataType::Type::kReference); } else { return nullptr; } diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h index 978f113ec4..60831a9e6a 100644 --- a/compiler/optimizing/ssa_builder.h +++ b/compiler/optimizing/ssa_builder.h @@ -17,7 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ #define ART_COMPILER_OPTIMIZING_SSA_BUILDER_H_ -#include "base/arena_containers.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -50,34 +51,36 @@ class SsaBuilder : public ValueObject { SsaBuilder(HGraph* graph, Handle<mirror::ClassLoader> class_loader, Handle<mirror::DexCache> dex_cache, - VariableSizedHandleScope* handles) + VariableSizedHandleScope* handles, + ScopedArenaAllocator* local_allocator) : graph_(graph), class_loader_(class_loader), dex_cache_(dex_cache), handles_(handles), agets_fixed_(false), - ambiguous_agets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), - ambiguous_asets_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)), - uninitialized_strings_(graph->GetArena()->Adapter(kArenaAllocGraphBuilder)) { + local_allocator_(local_allocator), + ambiguous_agets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + ambiguous_asets_(local_allocator->Adapter(kArenaAllocGraphBuilder)), + uninitialized_strings_(local_allocator->Adapter(kArenaAllocGraphBuilder)) { graph_->InitializeInexactObjectRTI(handles); } GraphAnalysisResult BuildSsa(); - HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, Primitive::Type type); + HInstruction* GetFloatOrDoubleEquivalent(HInstruction* instruction, DataType::Type type); HInstruction* GetReferenceTypeEquivalent(HInstruction* instruction); void MaybeAddAmbiguousArrayGet(HArrayGet* aget) { - Primitive::Type type = aget->GetType(); - DCHECK(!Primitive::IsFloatingPointType(type)); - if (Primitive::IsIntOrLongType(type)) { + DataType::Type type = aget->GetType(); + DCHECK(!DataType::IsFloatingPointType(type)); + if (DataType::IsIntOrLongType(type)) { ambiguous_agets_.push_back(aget); } } void MaybeAddAmbiguousArraySet(HArraySet* aset) { - Primitive::Type type = aset->GetValue()->GetType(); - if (Primitive::IsIntOrLongType(type)) { + DataType::Type type = aset->GetValue()->GetType(); + if (DataType::IsIntOrLongType(type)) { ambiguous_asets_.push_back(aset); } } @@ -105,18 +108,18 @@ class SsaBuilder : public ValueObject { // input. Returns false if the type of an array is unknown. bool FixAmbiguousArrayOps(); - bool TypeInputsOfPhi(HPhi* phi, ArenaVector<HPhi*>* worklist); - bool UpdatePrimitiveType(HPhi* phi, ArenaVector<HPhi*>* worklist); - void ProcessPrimitiveTypePropagationWorklist(ArenaVector<HPhi*>* worklist); + bool TypeInputsOfPhi(HPhi* phi, ScopedArenaVector<HPhi*>* worklist); + bool UpdatePrimitiveType(HPhi* phi, ScopedArenaVector<HPhi*>* worklist); + void ProcessPrimitiveTypePropagationWorklist(ScopedArenaVector<HPhi*>* worklist); HFloatConstant* GetFloatEquivalent(HIntConstant* constant); HDoubleConstant* GetDoubleEquivalent(HLongConstant* constant); - HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, Primitive::Type type); + HPhi* GetFloatDoubleOrReferenceEquivalentOfPhi(HPhi* phi, DataType::Type type); HArrayGet* GetFloatOrDoubleEquivalentOfArrayGet(HArrayGet* aget); void RemoveRedundantUninitializedStrings(); - HGraph* graph_; + HGraph* const graph_; Handle<mirror::ClassLoader> class_loader_; Handle<mirror::DexCache> dex_cache_; VariableSizedHandleScope* const handles_; @@ -124,9 +127,10 @@ class SsaBuilder : public ValueObject { // True if types of ambiguous ArrayGets have been resolved. bool agets_fixed_; - ArenaVector<HArrayGet*> ambiguous_agets_; - ArenaVector<HArraySet*> ambiguous_asets_; - ArenaVector<HNewInstance*> uninitialized_strings_; + ScopedArenaAllocator* const local_allocator_; + ScopedArenaVector<HArrayGet*> ambiguous_agets_; + ScopedArenaVector<HArraySet*> ambiguous_asets_; + ScopedArenaVector<HNewInstance*> uninitialized_strings_; DISALLOW_COPY_AND_ASSIGN(SsaBuilder); }; diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc index 185303bc8c..f6bd05269e 100644 --- a/compiler/optimizing/ssa_liveness_analysis.cc +++ b/compiler/optimizing/ssa_liveness_analysis.cc @@ -26,7 +26,7 @@ namespace art { void SsaLivenessAnalysis::Analyze() { // Compute the linear order directly in the graph's data structure // (there are no more following graph mutations). - LinearizeGraph(graph_, graph_->GetArena(), &graph_->linear_order_); + LinearizeGraph(graph_, &graph_->linear_order_); // Liveness analysis. NumberInstructions(); @@ -56,7 +56,7 @@ void SsaLivenessAnalysis::NumberInstructions() { instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); + LiveInterval::MakeInterval(allocator_, current->GetType(), current)); } current->SetLifetimePosition(lifetime_position); } @@ -74,7 +74,7 @@ void SsaLivenessAnalysis::NumberInstructions() { instructions_from_ssa_index_.push_back(current); current->SetSsaIndex(ssa_index++); current->SetLiveInterval( - LiveInterval::MakeInterval(graph_->GetArena(), current->GetType(), current)); + LiveInterval::MakeInterval(allocator_, current->GetType(), current)); } instructions_from_lifetime_position_.push_back(current); current->SetLifetimePosition(lifetime_position); @@ -89,7 +89,7 @@ void SsaLivenessAnalysis::NumberInstructions() { void SsaLivenessAnalysis::ComputeLiveness() { for (HBasicBlock* block : graph_->GetLinearOrder()) { block_infos_[block->GetBlockId()] = - new (graph_->GetArena()) BlockInfo(graph_->GetArena(), *block, number_of_ssa_values_); + new (allocator_) BlockInfo(allocator_, *block, number_of_ssa_values_); } // Compute the live ranges, as well as the initial live_in, live_out, and kill sets. @@ -474,11 +474,14 @@ size_t LiveInterval::NumberOfSpillSlotsNeeded() const { // For a SIMD operation, compute the number of needed spill slots. // TODO: do through vector type? HInstruction* definition = GetParent()->GetDefinedBy(); - if (definition != nullptr && definition->IsVecOperation()) { + if (definition != nullptr && HVecOperation::ReturnsSIMDValue(definition)) { + if (definition->IsPhi()) { + definition = definition->InputAt(1); // SIMD always appears on back-edge + } return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize; } // Return number of needed spill slots based on type. - return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1; + return (type_ == DataType::Type::kInt64 || type_ == DataType::Type::kFloat64) ? 2 : 1; } Location LiveInterval::ToLocation() const { diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h index a6681575a2..f83bb52b69 100644 --- a/compiler/optimizing/ssa_liveness_analysis.h +++ b/compiler/optimizing/ssa_liveness_analysis.h @@ -20,6 +20,8 @@ #include <iostream> #include "base/iteration_range.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "nodes.h" #include "utils/intrusive_forward_list.h" @@ -32,7 +34,7 @@ static constexpr int kNoRegister = -1; class BlockInfo : public ArenaObject<kArenaAllocSsaLiveness> { public: - BlockInfo(ArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) + BlockInfo(ScopedArenaAllocator* allocator, const HBasicBlock& block, size_t number_of_ssa_values) : block_(block), live_in_(allocator, number_of_ssa_values, false, kArenaAllocSsaLiveness), live_out_(allocator, number_of_ssa_values, false, kArenaAllocSsaLiveness), @@ -82,7 +84,7 @@ class LiveRange FINAL : public ArenaObject<kArenaAllocSsaLiveness> { stream << "[" << start_ << "," << end_ << ")"; } - LiveRange* Dup(ArenaAllocator* allocator) const { + LiveRange* Dup(ScopedArenaAllocator* allocator) const { return new (allocator) LiveRange( start_, end_, next_ == nullptr ? nullptr : next_->Dup(allocator)); } @@ -135,7 +137,7 @@ class UsePosition : public ArenaObject<kArenaAllocSsaLiveness>, return user_->GetBlock()->GetLoopInformation(); } - UsePosition* Clone(ArenaAllocator* allocator) const { + UsePosition* Clone(ScopedArenaAllocator* allocator) const { return new (allocator) UsePosition(user_, input_index_, position_); } @@ -180,7 +182,7 @@ class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness>, stream << position_; } - EnvUsePosition* Clone(ArenaAllocator* allocator) const { + EnvUsePosition* Clone(ScopedArenaAllocator* allocator) const { return new (allocator) EnvUsePosition(environment_, input_index_, position_); } @@ -261,17 +263,19 @@ class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> { */ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { public: - static LiveInterval* MakeInterval(ArenaAllocator* allocator, - Primitive::Type type, + static LiveInterval* MakeInterval(ScopedArenaAllocator* allocator, + DataType::Type type, HInstruction* instruction = nullptr) { return new (allocator) LiveInterval(allocator, type, instruction); } - static LiveInterval* MakeFixedInterval(ArenaAllocator* allocator, int reg, Primitive::Type type) { + static LiveInterval* MakeFixedInterval(ScopedArenaAllocator* allocator, + int reg, + DataType::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, true, reg, false); } - static LiveInterval* MakeTempInterval(ArenaAllocator* allocator, Primitive::Type type) { + static LiveInterval* MakeTempInterval(ScopedArenaAllocator* allocator, DataType::Type type) { return new (allocator) LiveInterval(allocator, type, nullptr, false, kNoRegister, true); } @@ -608,7 +612,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { return parent_->env_uses_; } - Primitive::Type GetType() const { + DataType::Type GetType() const { return type_; } @@ -783,7 +787,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { size_t NumberOfSpillSlotsNeeded() const; bool IsFloatingPoint() const { - return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble; + return type_ == DataType::Type::kFloat32 || type_ == DataType::Type::kFloat64; } // Converts the location of the interval to a `Location` object. @@ -969,8 +973,8 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } private: - LiveInterval(ArenaAllocator* allocator, - Primitive::Type type, + LiveInterval(ScopedArenaAllocator* allocator, + DataType::Type type, HInstruction* defined_by = nullptr, bool is_fixed = false, int reg = kNoRegister, @@ -1082,7 +1086,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { } } - ArenaAllocator* const allocator_; + ScopedArenaAllocator* const allocator_; // Ranges of this interval. We need a quick access to the last range to test // for liveness (see `IsDeadAt`). @@ -1102,7 +1106,7 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { EnvUsePositionList env_uses_; // The instruction type this interval corresponds to. - const Primitive::Type type_; + const DataType::Type type_; // Live interval that is the result of a split. LiveInterval* next_sibling_; @@ -1158,14 +1162,15 @@ class LiveInterval : public ArenaObject<kArenaAllocSsaLiveness> { */ class SsaLivenessAnalysis : public ValueObject { public: - SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen) + SsaLivenessAnalysis(HGraph* graph, CodeGenerator* codegen, ScopedArenaAllocator* allocator) : graph_(graph), codegen_(codegen), + allocator_(allocator), block_infos_(graph->GetBlocks().size(), nullptr, - graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), - instructions_from_ssa_index_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), - instructions_from_lifetime_position_(graph->GetArena()->Adapter(kArenaAllocSsaLiveness)), + allocator_->Adapter(kArenaAllocSsaLiveness)), + instructions_from_ssa_index_(allocator_->Adapter(kArenaAllocSsaLiveness)), + instructions_from_lifetime_position_(allocator_->Adapter(kArenaAllocSsaLiveness)), number_of_ssa_values_(0) { } @@ -1262,7 +1267,7 @@ class SsaLivenessAnalysis : public ValueObject { // the exception handler to its location at the top of the catch block. if (env_holder->CanThrowIntoCatchBlock()) return true; if (instruction->GetBlock()->GetGraph()->IsDebuggable()) return true; - return instruction->GetType() == Primitive::kPrimNot; + return instruction->GetType() == DataType::Type::kReference; } void CheckNoLiveInIrreducibleLoop(const HBasicBlock& block) const { @@ -1284,13 +1289,18 @@ class SsaLivenessAnalysis : public ValueObject { HGraph* const graph_; CodeGenerator* const codegen_; - ArenaVector<BlockInfo*> block_infos_; + + // Use a local ScopedArenaAllocator for allocating memory. + // This allocator must remain alive while doing register allocation. + ScopedArenaAllocator* const allocator_; + + ScopedArenaVector<BlockInfo*> block_infos_; // Temporary array used when computing live_in, live_out, and kill sets. - ArenaVector<HInstruction*> instructions_from_ssa_index_; + ScopedArenaVector<HInstruction*> instructions_from_ssa_index_; // Temporary array used when inserting moves in the graph. - ArenaVector<HInstruction*> instructions_from_lifetime_position_; + ScopedArenaVector<HInstruction*> instructions_from_lifetime_position_; size_t number_of_ssa_values_; ART_FRIEND_TEST(RegisterAllocatorTest, SpillInactive); diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc index 029eb4ba61..b9bfbaa173 100644 --- a/compiler/optimizing/ssa_liveness_analysis_test.cc +++ b/compiler/optimizing/ssa_liveness_analysis_test.cc @@ -14,24 +14,23 @@ * limitations under the License. */ +#include "ssa_liveness_analysis.h" + #include "arch/instruction_set.h" #include "arch/instruction_set_features.h" #include "base/arena_allocator.h" #include "base/arena_containers.h" -#include "driver/compiler_options.h" #include "code_generator.h" +#include "driver/compiler_options.h" #include "nodes.h" #include "optimizing_unit_test.h" -#include "ssa_liveness_analysis.h" namespace art { -class SsaLivenessAnalysisTest : public testing::Test { +class SsaLivenessAnalysisTest : public OptimizingUnitTest { public: SsaLivenessAnalysisTest() - : pool_(), - allocator_(&pool_), - graph_(CreateGraph(&allocator_)), + : graph_(CreateGraph()), compiler_options_(), instruction_set_(kRuntimeISA) { std::string error_msg; @@ -43,7 +42,7 @@ class SsaLivenessAnalysisTest : public testing::Test { compiler_options_); CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture."; // Create entry block. - entry_ = new (&allocator_) HBasicBlock(graph_); + entry_ = new (GetAllocator()) HBasicBlock(graph_); graph_->AddBlock(entry_); graph_->SetEntryBlock(entry_); } @@ -51,14 +50,12 @@ class SsaLivenessAnalysisTest : public testing::Test { protected: HBasicBlock* CreateSuccessor(HBasicBlock* block) { HGraph* graph = block->GetGraph(); - HBasicBlock* successor = new (&allocator_) HBasicBlock(graph); + HBasicBlock* successor = new (GetAllocator()) HBasicBlock(graph); graph->AddBlock(successor); block->AddSuccessor(successor); return successor; } - ArenaPool pool_; - ArenaAllocator allocator_; HGraph* graph_; CompilerOptions compiler_options_; InstructionSet instruction_set_; @@ -68,17 +65,17 @@ class SsaLivenessAnalysisTest : public testing::Test { }; TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { - HInstruction* arg = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt); + HInstruction* arg = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kInt32); entry_->AddInstruction(arg); HBasicBlock* block = CreateSuccessor(entry_); - HInstruction* ret = new (&allocator_) HReturn(arg); + HInstruction* ret = new (GetAllocator()) HReturn(arg); block->AddInstruction(ret); - block->AddInstruction(new (&allocator_) HExit()); + block->AddInstruction(new (GetAllocator()) HExit()); graph_->BuildDominatorTree(); - SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get(), GetScopedAllocator()); ssa_analysis.Analyze(); std::ostringstream arg_dump; @@ -88,49 +85,48 @@ TEST_F(SsaLivenessAnalysisTest, TestReturnArg) { } TEST_F(SsaLivenessAnalysisTest, TestAput) { - HInstruction* array = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* index = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); - HInstruction* value = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt); - HInstruction* extra_arg1 = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt); - HInstruction* extra_arg2 = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot); - ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 }, - allocator_.Adapter()); + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); + HInstruction* value = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(2), 2, DataType::Type::kInt32); + HInstruction* extra_arg1 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(3), 3, DataType::Type::kInt32); + HInstruction* extra_arg2 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(4), 4, DataType::Type::kReference); + HInstruction* const args[] = { array, index, value, extra_arg1, extra_arg2 }; for (HInstruction* insn : args) { entry_->AddInstruction(insn); } HBasicBlock* block = CreateSuccessor(entry_); - HInstruction* null_check = new (&allocator_) HNullCheck(array, 0); + HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); - HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - null_check); - null_check_env->CopyFrom(args); + HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + null_check); + null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); - HInstruction* length = new (&allocator_) HArrayLength(array, 0); + HInstruction* length = new (GetAllocator()) HArrayLength(array, 0); block->AddInstruction(length); - HInstruction* bounds_check = new (&allocator_) HBoundsCheck(index, length, /* dex_pc */ 0u); + HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(index, length, /* dex_pc */ 0u); block->AddInstruction(bounds_check); - HEnvironment* bounds_check_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - bounds_check); - bounds_check_env->CopyFrom(args); + HEnvironment* bounds_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + bounds_check); + bounds_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); bounds_check->SetRawEnvironment(bounds_check_env); HInstruction* array_set = - new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); - SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get(), GetScopedAllocator()); ssa_analysis.Analyze(); EXPECT_FALSE(graph_->IsDebuggable()); @@ -147,7 +143,7 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { // Environment uses keep the reference argument alive. "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; - ASSERT_EQ(arraysize(expected), args.size()); + static_assert(arraysize(expected) == arraysize(args), "Array size check."); size_t arg_index = 0u; for (HInstruction* arg : args) { std::ostringstream arg_dump; @@ -158,53 +154,52 @@ TEST_F(SsaLivenessAnalysisTest, TestAput) { } TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { - HInstruction* array = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot); - HInstruction* index = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt); - HInstruction* value = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt); - HInstruction* extra_arg1 = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt); - HInstruction* extra_arg2 = new (&allocator_) HParameterValue( - graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot); - ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 }, - allocator_.Adapter()); + HInstruction* array = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(0), 0, DataType::Type::kReference); + HInstruction* index = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(1), 1, DataType::Type::kInt32); + HInstruction* value = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(2), 2, DataType::Type::kInt32); + HInstruction* extra_arg1 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(3), 3, DataType::Type::kInt32); + HInstruction* extra_arg2 = new (GetAllocator()) HParameterValue( + graph_->GetDexFile(), dex::TypeIndex(4), 4, DataType::Type::kReference); + HInstruction* const args[] = { array, index, value, extra_arg1, extra_arg2 }; for (HInstruction* insn : args) { entry_->AddInstruction(insn); } HBasicBlock* block = CreateSuccessor(entry_); - HInstruction* null_check = new (&allocator_) HNullCheck(array, 0); + HInstruction* null_check = new (GetAllocator()) HNullCheck(array, 0); block->AddInstruction(null_check); - HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - null_check); - null_check_env->CopyFrom(args); + HEnvironment* null_check_env = new (GetAllocator()) HEnvironment(GetAllocator(), + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + null_check); + null_check_env->CopyFrom(ArrayRef<HInstruction* const>(args)); null_check->SetRawEnvironment(null_check_env); - HInstruction* length = new (&allocator_) HArrayLength(array, 0); + HInstruction* length = new (GetAllocator()) HArrayLength(array, 0); block->AddInstruction(length); // Use HAboveOrEqual+HDeoptimize as the bounds check. - HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length); + HInstruction* ae = new (GetAllocator()) HAboveOrEqual(index, length); block->AddInstruction(ae); - HInstruction* deoptimize = - new(&allocator_) HDeoptimize(&allocator_, ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); + HInstruction* deoptimize = new(GetAllocator()) HDeoptimize( + GetAllocator(), ae, DeoptimizationKind::kBlockBCE, /* dex_pc */ 0u); block->AddInstruction(deoptimize); - HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_, - /* number_of_vregs */ 5, - /* method */ nullptr, - /* dex_pc */ 0u, - deoptimize); - deoptimize_env->CopyFrom(args); + HEnvironment* deoptimize_env = new (GetAllocator()) HEnvironment(GetAllocator(), + /* number_of_vregs */ 5, + /* method */ nullptr, + /* dex_pc */ 0u, + deoptimize); + deoptimize_env->CopyFrom(ArrayRef<HInstruction* const>(args)); deoptimize->SetRawEnvironment(deoptimize_env); HInstruction* array_set = - new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0); + new (GetAllocator()) HArraySet(array, index, value, DataType::Type::kInt32, /* dex_pc */ 0); block->AddInstruction(array_set); graph_->BuildDominatorTree(); - SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get()); + SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get(), GetScopedAllocator()); ssa_analysis.Analyze(); EXPECT_FALSE(graph_->IsDebuggable()); @@ -220,7 +215,7 @@ TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) { // Environment uses keep the reference argument alive. "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0", }; - ASSERT_EQ(arraysize(expected), args.size()); + static_assert(arraysize(expected) == arraysize(args), "Array size check."); size_t arg_index = 0u; for (HInstruction* arg : args) { std::ostringstream arg_dump; diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc index aec7a3c555..cb27ded17a 100644 --- a/compiler/optimizing/ssa_phi_elimination.cc +++ b/compiler/optimizing/ssa_phi_elimination.cc @@ -16,8 +16,9 @@ #include "ssa_phi_elimination.h" -#include "base/arena_containers.h" #include "base/arena_bit_vector.h" +#include "base/scoped_arena_allocator.h" +#include "base/scoped_arena_containers.h" #include "base/bit_vector-inl.h" namespace art { @@ -28,10 +29,17 @@ void SsaDeadPhiElimination::Run() { } void SsaDeadPhiElimination::MarkDeadPhis() { + // Use local allocator for allocating memory used by this optimization. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + + static constexpr size_t kDefaultWorklistSize = 8; + ScopedArenaVector<HPhi*> worklist(allocator.Adapter(kArenaAllocSsaPhiElimination)); + worklist.reserve(kDefaultWorklistSize); + // Phis are constructed live and should not be revived if previously marked // dead. This algorithm temporarily breaks that invariant but we DCHECK that // only phis which were initially live are revived. - ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination)); + ScopedArenaSet<HPhi*> initially_live(allocator.Adapter(kArenaAllocSsaPhiElimination)); // Add to the worklist phis referenced by non-phi instructions. for (HBasicBlock* block : graph_->GetReversePostOrder()) { @@ -52,7 +60,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { } if (keep_alive) { - worklist_.push_back(phi); + worklist.push_back(phi); } else { phi->SetDead(); if (kIsDebugBuild) { @@ -63,9 +71,9 @@ void SsaDeadPhiElimination::MarkDeadPhis() { } // Process the worklist by propagating liveness to phi inputs. - while (!worklist_.empty()) { - HPhi* phi = worklist_.back(); - worklist_.pop_back(); + while (!worklist.empty()) { + HPhi* phi = worklist.back(); + worklist.pop_back(); for (HInstruction* raw_input : phi->GetInputs()) { HPhi* input = raw_input->AsPhi(); if (input != nullptr && input->IsDead()) { @@ -73,7 +81,7 @@ void SsaDeadPhiElimination::MarkDeadPhis() { // that the phi was not dead initially (see definition of `initially_live`). DCHECK(ContainsElement(initially_live, input)); input->SetLive(); - worklist_.push_back(input); + worklist.push_back(input); } } } @@ -115,23 +123,31 @@ void SsaDeadPhiElimination::EliminateDeadPhis() { } void SsaRedundantPhiElimination::Run() { + // Use local allocator for allocating memory used by this optimization. + ScopedArenaAllocator allocator(graph_->GetArenaStack()); + + static constexpr size_t kDefaultWorklistSize = 8; + ScopedArenaVector<HPhi*> worklist(allocator.Adapter(kArenaAllocSsaPhiElimination)); + worklist.reserve(kDefaultWorklistSize); + // Add all phis in the worklist. Order does not matter for correctness, and // neither will necessarily converge faster. for (HBasicBlock* block : graph_->GetReversePostOrder()) { for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { - worklist_.push_back(inst_it.Current()->AsPhi()); + worklist.push_back(inst_it.Current()->AsPhi()); } } - ArenaBitVector visited_phis_in_cycle(graph_->GetArena(), + ArenaBitVector visited_phis_in_cycle(&allocator, graph_->GetCurrentInstructionId(), /* expandable */ false, kArenaAllocSsaPhiElimination); - ArenaVector<HPhi*> cycle_worklist(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination)); + visited_phis_in_cycle.ClearAllBits(); + ScopedArenaVector<HPhi*> cycle_worklist(allocator.Adapter(kArenaAllocSsaPhiElimination)); - while (!worklist_.empty()) { - HPhi* phi = worklist_.back(); - worklist_.pop_back(); + while (!worklist.empty()) { + HPhi* phi = worklist.back(); + worklist.pop_back(); // If the phi has already been processed, continue. if (!phi->IsInBlock()) { @@ -231,7 +247,7 @@ void SsaRedundantPhiElimination::Run() { for (const HUseListNode<HInstruction*>& use : current->GetUses()) { HInstruction* user = use.GetUser(); if (user->IsPhi() && !visited_phis_in_cycle.IsBitSet(user->GetId())) { - worklist_.push_back(user->AsPhi()); + worklist.push_back(user->AsPhi()); } } DCHECK(candidate->StrictlyDominates(current)); diff --git a/compiler/optimizing/ssa_phi_elimination.h b/compiler/optimizing/ssa_phi_elimination.h index b48e8200d5..11d5837eb5 100644 --- a/compiler/optimizing/ssa_phi_elimination.h +++ b/compiler/optimizing/ssa_phi_elimination.h @@ -17,7 +17,6 @@ #ifndef ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ #define ART_COMPILER_OPTIMIZING_SSA_PHI_ELIMINATION_H_ -#include "base/arena_containers.h" #include "nodes.h" #include "optimization.h" @@ -30,10 +29,7 @@ namespace art { class SsaDeadPhiElimination : public HOptimization { public: explicit SsaDeadPhiElimination(HGraph* graph) - : HOptimization(graph, kSsaDeadPhiEliminationPassName), - worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { - worklist_.reserve(kDefaultWorklistSize); - } + : HOptimization(graph, kSsaDeadPhiEliminationPassName) {} void Run() OVERRIDE; @@ -43,10 +39,6 @@ class SsaDeadPhiElimination : public HOptimization { static constexpr const char* kSsaDeadPhiEliminationPassName = "dead_phi_elimination"; private: - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - DISALLOW_COPY_AND_ASSIGN(SsaDeadPhiElimination); }; @@ -59,20 +51,13 @@ class SsaDeadPhiElimination : public HOptimization { class SsaRedundantPhiElimination : public HOptimization { public: explicit SsaRedundantPhiElimination(HGraph* graph) - : HOptimization(graph, kSsaRedundantPhiEliminationPassName), - worklist_(graph->GetArena()->Adapter(kArenaAllocSsaPhiElimination)) { - worklist_.reserve(kDefaultWorklistSize); - } + : HOptimization(graph, kSsaRedundantPhiEliminationPassName) {} void Run() OVERRIDE; static constexpr const char* kSsaRedundantPhiEliminationPassName = "redundant_phi_elimination"; private: - ArenaVector<HPhi*> worklist_; - - static constexpr size_t kDefaultWorklistSize = 8; - DISALLOW_COPY_AND_ASSIGN(SsaRedundantPhiElimination); }; diff --git a/compiler/optimizing/ssa_test.cc b/compiler/optimizing/ssa_test.cc index f69f417efc..85ed06eb9b 100644 --- a/compiler/optimizing/ssa_test.cc +++ b/compiler/optimizing/ssa_test.cc @@ -18,8 +18,8 @@ #include "base/arena_allocator.h" #include "builder.h" -#include "dex_file.h" -#include "dex_instruction.h" +#include "dex/dex_file.h" +#include "dex/dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -29,7 +29,10 @@ namespace art { -class SsaTest : public CommonCompilerTest {}; +class SsaTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data, const char* expected); +}; class SsaPrettyPrinter : public HPrettyPrinter { public: @@ -77,10 +80,8 @@ static void ReNumberInstructions(HGraph* graph) { } } -static void TestCode(const uint16_t* data, const char* expected) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +void SsaTest::TestCode(const std::vector<uint16_t>& data, const char* expected) { + HGraph* graph = CreateCFG(data); // Suspend checks implementation may change in the future, and this test relies // on how instructions are ordered. RemoveSuspendChecks(graph); @@ -89,7 +90,7 @@ static void TestCode(const uint16_t* data, const char* expected) { // Test that phis had their type set. for (HBasicBlock* block : graph->GetBlocks()) { for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { - ASSERT_NE(it.Current()->GetType(), Primitive::kPrimVoid); + ASSERT_NE(it.Current()->GetType(), DataType::Type::kVoid); } } @@ -118,7 +119,7 @@ TEST_F(SsaTest, CFG1) { "BasicBlock 5, pred: 1, succ: 3\n" " 7: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::GOTO | 0x100, @@ -149,7 +150,7 @@ TEST_F(SsaTest, CFG2) { "BasicBlock 5, pred: 1, succ: 3\n" " 9: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 0, @@ -180,7 +181,7 @@ TEST_F(SsaTest, CFG3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -213,7 +214,7 @@ TEST_F(SsaTest, Loop1) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -244,7 +245,7 @@ TEST_F(SsaTest, Loop2) { "BasicBlock 5, pred: 4\n" " 9: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -275,7 +276,7 @@ TEST_F(SsaTest, Loop3) { "BasicBlock 5, pred: 4\n" " 10: Exit\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -309,7 +310,7 @@ TEST_F(SsaTest, Loop4) { "BasicBlock 6, pred: 5\n" " 10: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::GOTO | 0x500, Instruction::IF_EQ, 5, @@ -350,7 +351,7 @@ TEST_F(SsaTest, Loop5) { " 13: Phi(2, 1) [11, 8, 8]\n" " 14: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 4, Instruction::CONST_4 | 4 << 12 | 0, @@ -389,7 +390,7 @@ TEST_F(SsaTest, Loop6) { "BasicBlock 7, pred: 6\n" " 13: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -431,7 +432,7 @@ TEST_F(SsaTest, Loop7) { "BasicBlock 8, pred: 2, succ: 6\n" " 15: Goto\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 8, Instruction::CONST_4 | 4 << 12 | 0, @@ -455,7 +456,7 @@ TEST_F(SsaTest, DeadLocal) { "BasicBlock 2, pred: 1\n" " 3: Exit\n"; - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::RETURN_VOID); @@ -483,7 +484,7 @@ TEST_F(SsaTest, LocalInIf) { "BasicBlock 5, pred: 1, succ: 3\n" " 8: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 3, Instruction::CONST_4 | 4 << 12 | 1 << 8, @@ -519,7 +520,7 @@ TEST_F(SsaTest, MultiplePredecessors) { "BasicBlock 7, pred: 3, succ: 5\n" " 12: Goto\n"; - const uint16_t data[] = TWO_REGISTERS_CODE_ITEM( + const std::vector<uint16_t> data = TWO_REGISTERS_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 5, Instruction::ADD_INT_LIT8 | 1 << 8, 0 << 8, diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index b7840d73db..7010e3f380 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -18,6 +18,7 @@ #include "art_method-inl.h" #include "base/stl_util.h" +#include "dex/dex_file_types.h" #include "optimizing/optimizing_compiler.h" #include "runtime.h" #include "scoped_thread_state_change-inl.h" @@ -31,7 +32,6 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, uint32_t num_dex_registers, uint8_t inlining_depth) { DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry"; - DCHECK_NE(dex_pc, static_cast<uint32_t>(-1)) << "invalid dex_pc"; current_entry_.dex_pc = dex_pc; current_entry_.native_pc_code_offset = CodeOffset::FromOffset(native_pc_offset, instruction_set_); current_entry_.register_mask = register_mask; @@ -39,12 +39,15 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.inlining_depth = inlining_depth; current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.stack_mask_index = 0; - current_entry_.dex_method_index = DexFile::kDexNoIndex; + current_entry_.dex_method_index = dex::kDexNoIndex; current_entry_.dex_register_entry.num_dex_registers = num_dex_registers; current_entry_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_entry_.dex_register_entry.live_dex_registers_mask = (num_dex_registers != 0) - ? ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream) - : nullptr; + current_entry_.dex_register_entry.live_dex_registers_mask = nullptr; + if (num_dex_registers != 0u) { + current_entry_.dex_register_entry.live_dex_registers_mask = + ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); + current_entry_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); + } if (sp_mask != nullptr) { stack_mask_max_ = std::max(stack_mask_max_, sp_mask->GetHighestBitSet()); } @@ -52,7 +55,10 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, number_of_stack_maps_with_inline_info_++; } - dex_pc_max_ = std::max(dex_pc_max_, dex_pc); + // Note: dex_pc can be kNoDexPc for native method intrinsics. + if (dex_pc != dex::kDexNoIndex && (dex_pc_max_ == dex::kDexNoIndex || dex_pc_max_ < dex_pc)) { + dex_pc_max_ = dex_pc; + } register_mask_max_ = std::max(register_mask_max_, register_mask); current_dex_register_ = 0; } @@ -120,9 +126,12 @@ void StackMapStream::BeginInlineInfoEntry(ArtMethod* method, current_inline_info_.dex_pc = dex_pc; current_inline_info_.dex_register_entry.num_dex_registers = num_dex_registers; current_inline_info_.dex_register_entry.locations_start_index = dex_register_locations_.size(); - current_inline_info_.dex_register_entry.live_dex_registers_mask = (num_dex_registers != 0) - ? ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream) - : nullptr; + current_inline_info_.dex_register_entry.live_dex_registers_mask = nullptr; + if (num_dex_registers != 0) { + current_inline_info_.dex_register_entry.live_dex_registers_mask = + ArenaBitVector::Create(allocator_, num_dex_registers, true, kArenaAllocStackMapStream); + current_inline_info_.dex_register_entry.live_dex_registers_mask->ClearAllBits(); + } current_dex_register_ = 0; } @@ -226,7 +235,7 @@ void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) { size_t invoke_infos_count = 0; size_t invoke_type_max = 0; for (const StackMapEntry& entry : stack_maps_) { - if (entry.dex_method_index != DexFile::kDexNoIndex) { + if (entry.dex_method_index != dex::kDexNoIndex) { native_pc_max = std::max(native_pc_max, entry.native_pc_code_offset.CompressedValue()); method_index_max = std::max(method_index_max, static_cast<uint16_t>(entry.dex_method_index)); invoke_type_max = std::max(invoke_type_max, static_cast<size_t>(entry.invoke_type)); @@ -240,7 +249,7 @@ void StackMapStream::ComputeInvokeInfoEncoding(CodeInfoEncoding* encoding) { void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, size_t dex_register_maps_bytes) { uint32_t method_index_max = 0; - uint32_t dex_pc_max = DexFile::kDexNoIndex; + uint32_t dex_pc_max = dex::kDexNoIndex; uint32_t extra_data_max = 0; uint32_t inline_info_index = 0; @@ -256,8 +265,8 @@ void StackMapStream::ComputeInlineInfoEncoding(InlineInfoEncoding* encoding, extra_data_max = std::max( extra_data_max, Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method))); } - if (inline_entry.dex_pc != DexFile::kDexNoIndex && - (dex_pc_max == DexFile::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) { + if (inline_entry.dex_pc != dex::kDexNoIndex && + (dex_pc_max == dex::kDexNoIndex || dex_pc_max < inline_entry.dex_pc)) { dex_pc_max = inline_entry.dex_pc; } } @@ -362,7 +371,7 @@ void StackMapStream::FillInCodeInfo(MemoryRegion region) { dex_register_locations_region); stack_map.SetDexRegisterMapOffset(encoding.stack_map.encoding, offset); - if (entry.dex_method_index != DexFile::kDexNoIndex) { + if (entry.dex_method_index != dex::kDexNoIndex) { InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx)); invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset); invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type); @@ -467,7 +476,7 @@ size_t StackMapStream::AddDexRegisterMapEntry(const DexRegisterMapEntry& entry) if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. - ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); + ScopedArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); stack_map_indices.push_back(current_entry_index); dex_map_hash_to_stack_map_indices_.Put(entry.hash, std::move(stack_map_indices)); } else { @@ -545,7 +554,7 @@ void StackMapStream::CheckDexRegisterMap(const CodeInfo& code_info, size_t StackMapStream::PrepareRegisterMasks() { register_masks_.resize(stack_maps_.size(), 0u); - ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); + ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); for (StackMapEntry& stack_map : stack_maps_) { const size_t index = dedupe.size(); stack_map.register_mask_index = dedupe.emplace(stack_map.register_mask, index).first->second; @@ -557,11 +566,11 @@ size_t StackMapStream::PrepareRegisterMasks() { void StackMapStream::PrepareMethodIndices() { CHECK(method_indices_.empty()); method_indices_.resize(stack_maps_.size() + inline_infos_.size()); - ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); + ScopedArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream)); for (StackMapEntry& stack_map : stack_maps_) { const size_t index = dedupe.size(); const uint32_t method_index = stack_map.dex_method_index; - if (method_index != DexFile::kDexNoIndex) { + if (method_index != dex::kDexNoIndex) { stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; method_indices_[index] = method_index; } @@ -569,7 +578,7 @@ void StackMapStream::PrepareMethodIndices() { for (InlineInfoEntry& inline_info : inline_infos_) { const size_t index = dedupe.size(); const uint32_t method_index = inline_info.method_index; - CHECK_NE(method_index, DexFile::kDexNoIndex); + CHECK_NE(method_index, dex::kDexNoIndex); inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second; method_indices_[index] = method_index; } @@ -583,11 +592,11 @@ size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) { stack_masks_.resize(byte_entry_size * stack_maps_.size(), 0u); // For deduplicating we store the stack masks as byte packed for simplicity. We can bit pack later // when copying out from stack_masks_. - ArenaUnorderedMap<MemoryRegion, - size_t, - FNVHash<MemoryRegion>, - MemoryRegion::ContentEquals> dedup( - stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); + ScopedArenaUnorderedMap<MemoryRegion, + size_t, + FNVHash<MemoryRegion>, + MemoryRegion::ContentEquals> dedup( + stack_maps_.size(), allocator_->Adapter(kArenaAllocStackMapStream)); for (StackMapEntry& stack_map : stack_maps_) { size_t index = dedup.size(); MemoryRegion stack_mask(stack_masks_.data() + index * byte_entry_size, byte_entry_size); @@ -629,7 +638,7 @@ void StackMapStream::CheckCodeInfo(MemoryRegion region) const { DCHECK_EQ(stack_mask.LoadBit(b), 0u); } } - if (entry.dex_method_index != DexFile::kDexNoIndex) { + if (entry.dex_method_index != dex::kDexNoIndex) { InvokeInfo invoke_info = code_info.GetInvokeInfo(encoding, invoke_info_index); DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_), entry.native_pc_code_offset.Uint32Value(instruction_set_)); diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index e6471e1bc5..579aabdb5f 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -17,9 +17,9 @@ #ifndef ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ #define ART_COMPILER_OPTIMIZING_STACK_MAP_STREAM_H_ -#include "base/arena_containers.h" #include "base/bit_vector-inl.h" #include "base/hash_map.h" +#include "base/scoped_arena_containers.h" #include "base/value_object.h" #include "memory_region.h" #include "method_info.h" @@ -60,8 +60,7 @@ class DexRegisterLocationHashFn { */ class StackMapStream : public ValueObject { public: - explicit StackMapStream(ArenaAllocator* allocator, - InstructionSet instruction_set) + explicit StackMapStream(ScopedArenaAllocator* allocator, InstructionSet instruction_set) : allocator_(allocator), instruction_set_(instruction_set), stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), @@ -74,7 +73,7 @@ class StackMapStream : public ValueObject { method_indices_(allocator->Adapter(kArenaAllocStackMapStream)), dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), - dex_pc_max_(0), + dex_pc_max_(kNoDexPc), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), @@ -126,7 +125,7 @@ class StackMapStream : public ValueObject { }; struct InlineInfoEntry { - uint32_t dex_pc; // DexFile::kDexNoIndex for intrinsified native methods. + uint32_t dex_pc; // dex::kDexNoIndex for intrinsified native methods. ArtMethod* method; uint32_t method_index; DexRegisterMapEntry dex_register_entry; @@ -223,37 +222,37 @@ class StackMapStream : public ValueObject { size_t dex_register_locations_index) const; void CheckCodeInfo(MemoryRegion region) const; - ArenaAllocator* allocator_; + ScopedArenaAllocator* const allocator_; const InstructionSet instruction_set_; - ArenaVector<StackMapEntry> stack_maps_; + ScopedArenaVector<StackMapEntry> stack_maps_; // A catalog of unique [location_kind, register_value] pairs (per method). - ArenaVector<DexRegisterLocation> location_catalog_entries_; + ScopedArenaVector<DexRegisterLocation> location_catalog_entries_; // Map from Dex register location catalog entries to their indices in the // location catalog. - using LocationCatalogEntriesIndices = ArenaHashMap<DexRegisterLocation, - size_t, - LocationCatalogEntriesIndicesEmptyFn, - DexRegisterLocationHashFn>; + using LocationCatalogEntriesIndices = ScopedArenaHashMap<DexRegisterLocation, + size_t, + LocationCatalogEntriesIndicesEmptyFn, + DexRegisterLocationHashFn>; LocationCatalogEntriesIndices location_catalog_entries_indices_; // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - ArenaVector<size_t> dex_register_locations_; - ArenaVector<InlineInfoEntry> inline_infos_; - ArenaVector<uint8_t> stack_masks_; - ArenaVector<uint32_t> register_masks_; - ArenaVector<uint32_t> method_indices_; - ArenaVector<DexRegisterMapEntry> dex_register_entries_; + ScopedArenaVector<size_t> dex_register_locations_; + ScopedArenaVector<InlineInfoEntry> inline_infos_; + ScopedArenaVector<uint8_t> stack_masks_; + ScopedArenaVector<uint32_t> register_masks_; + ScopedArenaVector<uint32_t> method_indices_; + ScopedArenaVector<DexRegisterMapEntry> dex_register_entries_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; - ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; + ScopedArenaSafeMap<uint32_t, ScopedArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; StackMapEntry current_entry_; InlineInfoEntry current_inline_info_; - ArenaVector<uint8_t> code_info_encoding_; + ScopedArenaVector<uint8_t> code_info_encoding_; size_t needed_size_; uint32_t current_dex_register_; bool in_inline_frame_; diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc index a842c6e452..7e517f3485 100644 --- a/compiler/optimizing/stack_map_test.cc +++ b/compiler/optimizing/stack_map_test.cc @@ -47,10 +47,11 @@ using Kind = DexRegisterLocation::Kind; TEST(StackMapTest, Test1) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, false); + ArenaBitVector sp_mask(&allocator, 0, false); size_t number_of_dex_registers = 2; stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(Kind::kInStack, 0); // Short location. @@ -58,7 +59,7 @@ TEST(StackMapTest, Test1) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -128,11 +129,12 @@ TEST(StackMapTest, Test1) { TEST(StackMapTest, Test2) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); ArtMethod art_method; - ArenaBitVector sp_mask1(&arena, 0, true); + ArenaBitVector sp_mask1(&allocator, 0, true); sp_mask1.SetBit(2); sp_mask1.SetBit(4); size_t number_of_dex_registers = 2; @@ -146,7 +148,7 @@ TEST(StackMapTest, Test2) { stream.EndInlineInfoEntry(); stream.EndStackMapEntry(); - ArenaBitVector sp_mask2(&arena, 0, true); + ArenaBitVector sp_mask2(&allocator, 0, true); sp_mask2.SetBit(3); sp_mask2.SetBit(8); stream.BeginStackMapEntry(1, 128, 0xFF, &sp_mask2, number_of_dex_registers, 0); @@ -154,7 +156,7 @@ TEST(StackMapTest, Test2) { stream.AddDexRegisterEntry(Kind::kInFpuRegister, 3); // Short location. stream.EndStackMapEntry(); - ArenaBitVector sp_mask3(&arena, 0, true); + ArenaBitVector sp_mask3(&allocator, 0, true); sp_mask3.SetBit(1); sp_mask3.SetBit(5); stream.BeginStackMapEntry(2, 192, 0xAB, &sp_mask3, number_of_dex_registers, 0); @@ -162,7 +164,7 @@ TEST(StackMapTest, Test2) { stream.AddDexRegisterEntry(Kind::kInRegisterHigh, 8); // Short location. stream.EndStackMapEntry(); - ArenaBitVector sp_mask4(&arena, 0, true); + ArenaBitVector sp_mask4(&allocator, 0, true); sp_mask4.SetBit(6); sp_mask4.SetBit(7); stream.BeginStackMapEntry(3, 256, 0xCD, &sp_mask4, number_of_dex_registers, 0); @@ -171,7 +173,7 @@ TEST(StackMapTest, Test2) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -412,11 +414,12 @@ TEST(StackMapTest, Test2) { TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); ArtMethod art_method; - ArenaBitVector sp_mask1(&arena, 0, true); + ArenaBitVector sp_mask1(&allocator, 0, true); sp_mask1.SetBit(2); sp_mask1.SetBit(4); const size_t number_of_dex_registers = 2; @@ -431,7 +434,7 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -506,10 +509,11 @@ TEST(StackMapTest, TestDeduplicateInlineInfoDexRegisterMap) { TEST(StackMapTest, TestNonLiveDexRegisters) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, false); + ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.AddDexRegisterEntry(Kind::kNone, 0); // No location. @@ -517,7 +521,7 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -585,10 +589,11 @@ TEST(StackMapTest, TestNonLiveDexRegisters) { // not treat it as kNoDexRegisterMap. TEST(StackMapTest, DexRegisterMapOffsetOverflow) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, false); + ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 1024; // Create the first stack map (and its Dex register map). stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); @@ -609,7 +614,7 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -648,10 +653,11 @@ TEST(StackMapTest, DexRegisterMapOffsetOverflow) { TEST(StackMapTest, TestShareDexRegisterMap) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, false); + ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 2; // First stack map. stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); @@ -670,7 +676,7 @@ TEST(StackMapTest, TestShareDexRegisterMap) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -706,10 +712,11 @@ TEST(StackMapTest, TestShareDexRegisterMap) { TEST(StackMapTest, TestNoDexRegisterMap) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, false); + ArenaBitVector sp_mask(&allocator, 0, false); uint32_t number_of_dex_registers = 0; stream.BeginStackMapEntry(0, 64, 0x3, &sp_mask, number_of_dex_registers, 0); stream.EndStackMapEntry(); @@ -719,7 +726,7 @@ TEST(StackMapTest, TestNoDexRegisterMap) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -755,11 +762,12 @@ TEST(StackMapTest, TestNoDexRegisterMap) { TEST(StackMapTest, InlineTest) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); ArtMethod art_method; - ArenaBitVector sp_mask1(&arena, 0, true); + ArenaBitVector sp_mask1(&allocator, 0, true); sp_mask1.SetBit(2); sp_mask1.SetBit(4); @@ -821,7 +829,7 @@ TEST(StackMapTest, InlineTest) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -920,26 +928,33 @@ TEST(StackMapTest, InlineTest) { TEST(StackMapTest, CodeOffsetTest) { // Test minimum alignments, encoding, and decoding. - CodeOffset offset_thumb2 = CodeOffset::FromOffset(kThumb2InstructionAlignment, kThumb2); - CodeOffset offset_arm64 = CodeOffset::FromOffset(kArm64InstructionAlignment, kArm64); - CodeOffset offset_x86 = CodeOffset::FromOffset(kX86InstructionAlignment, kX86); - CodeOffset offset_x86_64 = CodeOffset::FromOffset(kX86_64InstructionAlignment, kX86_64); - CodeOffset offset_mips = CodeOffset::FromOffset(kMipsInstructionAlignment, kMips); - CodeOffset offset_mips64 = CodeOffset::FromOffset(kMips64InstructionAlignment, kMips64); - EXPECT_EQ(offset_thumb2.Uint32Value(kThumb2), kThumb2InstructionAlignment); - EXPECT_EQ(offset_arm64.Uint32Value(kArm64), kArm64InstructionAlignment); - EXPECT_EQ(offset_x86.Uint32Value(kX86), kX86InstructionAlignment); - EXPECT_EQ(offset_x86_64.Uint32Value(kX86_64), kX86_64InstructionAlignment); - EXPECT_EQ(offset_mips.Uint32Value(kMips), kMipsInstructionAlignment); - EXPECT_EQ(offset_mips64.Uint32Value(kMips64), kMips64InstructionAlignment); + CodeOffset offset_thumb2 = + CodeOffset::FromOffset(kThumb2InstructionAlignment, InstructionSet::kThumb2); + CodeOffset offset_arm64 = + CodeOffset::FromOffset(kArm64InstructionAlignment, InstructionSet::kArm64); + CodeOffset offset_x86 = + CodeOffset::FromOffset(kX86InstructionAlignment, InstructionSet::kX86); + CodeOffset offset_x86_64 = + CodeOffset::FromOffset(kX86_64InstructionAlignment, InstructionSet::kX86_64); + CodeOffset offset_mips = + CodeOffset::FromOffset(kMipsInstructionAlignment, InstructionSet::kMips); + CodeOffset offset_mips64 = + CodeOffset::FromOffset(kMips64InstructionAlignment, InstructionSet::kMips64); + EXPECT_EQ(offset_thumb2.Uint32Value(InstructionSet::kThumb2), kThumb2InstructionAlignment); + EXPECT_EQ(offset_arm64.Uint32Value(InstructionSet::kArm64), kArm64InstructionAlignment); + EXPECT_EQ(offset_x86.Uint32Value(InstructionSet::kX86), kX86InstructionAlignment); + EXPECT_EQ(offset_x86_64.Uint32Value(InstructionSet::kX86_64), kX86_64InstructionAlignment); + EXPECT_EQ(offset_mips.Uint32Value(InstructionSet::kMips), kMipsInstructionAlignment); + EXPECT_EQ(offset_mips64.Uint32Value(InstructionSet::kMips64), kMips64InstructionAlignment); } TEST(StackMapTest, TestDeduplicateStackMask) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, true); + ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); sp_mask.SetBit(4); stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); @@ -948,7 +963,7 @@ TEST(StackMapTest, TestDeduplicateStackMask) { stream.EndStackMapEntry(); size_t size = stream.PrepareForFillIn(); - void* memory = arena.Alloc(size, kArenaAllocMisc); + void* memory = allocator.Alloc(size, kArenaAllocMisc); MemoryRegion region(memory, size); stream.FillInCodeInfo(region); @@ -964,10 +979,11 @@ TEST(StackMapTest, TestDeduplicateStackMask) { TEST(StackMapTest, TestInvokeInfo) { ArenaPool pool; - ArenaAllocator arena(&pool); - StackMapStream stream(&arena, kRuntimeISA); + ArenaStack arena_stack(&pool); + ScopedArenaAllocator allocator(&arena_stack); + StackMapStream stream(&allocator, kRuntimeISA); - ArenaBitVector sp_mask(&arena, 0, true); + ArenaBitVector sp_mask(&allocator, 0, true); sp_mask.SetBit(1); stream.BeginStackMapEntry(0, 4, 0x3, &sp_mask, 0, 0); stream.AddInvoke(kSuper, 1); @@ -980,11 +996,12 @@ TEST(StackMapTest, TestInvokeInfo) { stream.EndStackMapEntry(); const size_t code_info_size = stream.PrepareForFillIn(); - MemoryRegion code_info_region(arena.Alloc(code_info_size, kArenaAllocMisc), code_info_size); + MemoryRegion code_info_region(allocator.Alloc(code_info_size, kArenaAllocMisc), code_info_size); stream.FillInCodeInfo(code_info_region); const size_t method_info_size = stream.ComputeMethodInfoSize(); - MemoryRegion method_info_region(arena.Alloc(method_info_size, kArenaAllocMisc), method_info_size); + MemoryRegion method_info_region(allocator.Alloc(method_info_size, kArenaAllocMisc), + method_info_size); stream.FillInMethodInfo(method_info_region); CodeInfo code_info(code_info_region); diff --git a/compiler/optimizing/superblock_cloner.cc b/compiler/optimizing/superblock_cloner.cc new file mode 100644 index 0000000000..a7c23bef7e --- /dev/null +++ b/compiler/optimizing/superblock_cloner.cc @@ -0,0 +1,704 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "superblock_cloner.h" + +#include "common_dominator.h" +#include "graph_checker.h" + +#include <iostream> + +namespace art { + +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; +using HBasicBlockSet = SuperblockCloner::HBasicBlockSet; +using HEdgeSet = SuperblockCloner::HEdgeSet; + +void HEdge::Dump(std::ostream& stream) const { + stream << "(" << from_ << "->" << to_ << ")"; +} + +// +// Static helper methods. +// + +// Returns whether instruction has any uses (regular or environmental) outside the region, +// defined by basic block set. +static bool IsUsedOutsideRegion(const HInstruction* instr, const HBasicBlockSet& bb_set) { + auto& uses = instr->GetUses(); + for (auto use_node = uses.begin(), e = uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + auto& env_uses = instr->GetEnvUses(); + for (auto use_node = env_uses.begin(), e = env_uses.end(); use_node != e; ++use_node) { + HInstruction* user = use_node->GetUser()->GetHolder(); + if (!bb_set.IsBitSet(user->GetBlock()->GetBlockId())) { + return true; + } + } + + return false; +} + +// Returns whether the phi's inputs are the same HInstruction. +static bool ArePhiInputsTheSame(const HPhi* phi) { + HInstruction* first_input = phi->InputAt(0); + for (size_t i = 1, e = phi->InputCount(); i < e; i++) { + if (phi->InputAt(i) != first_input) { + return false; + } + } + + return true; +} + +// Returns a common predecessor of loop1 and loop2 in the loop tree or nullptr if it is the whole +// graph. +static HLoopInformation* FindCommonLoop(HLoopInformation* loop1, HLoopInformation* loop2) { + if (loop1 != nullptr || loop2 != nullptr) { + return nullptr; + } + + if (loop1->IsIn(*loop2)) { + return loop2; + } else if (loop2->IsIn(*loop1)) { + return loop1; + } + HBasicBlock* block = CommonDominator::ForPair(loop1->GetHeader(), loop2->GetHeader()); + return block->GetLoopInformation(); +} + +// Calls HGraph::OrderLoopHeaderPredecessors for each loop in the graph. +static void OrderLoopsHeadersPredecessors(HGraph* graph) { + for (HBasicBlock* block : graph->GetPostOrder()) { + if (block->IsLoopHeader()) { + graph->OrderLoopHeaderPredecessors(block); + } + } +} + +// +// Helpers for CloneBasicBlock. +// + +void SuperblockCloner::ReplaceInputsWithCopies(HInstruction* copy_instr) { + DCHECK(!copy_instr->IsPhi()); + for (size_t i = 0, e = copy_instr->InputCount(); i < e; i++) { + // Copy instruction holds the same input as the original instruction holds. + HInstruction* orig_input = copy_instr->InputAt(i); + if (!IsInOrigBBSet(orig_input->GetBlock())) { + // Defined outside the subgraph. + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + // copy_instr will be registered as a user of copy_inputs after returning from this function: + // 'copy_block->AddInstruction(copy_instr)'. + copy_instr->SetRawInputAt(i, copy_input); + } +} + +void SuperblockCloner::DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, + const HEnvironment* orig_env) { + if (orig_env->GetParent() != nullptr) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_env->GetParent()); + } + HEnvironment* copy_env = new (arena_) HEnvironment(arena_, *orig_env, copy_instr); + + for (size_t i = 0; i < orig_env->Size(); i++) { + HInstruction* env_input = orig_env->GetInstructionAt(i); + if (env_input != nullptr && IsInOrigBBSet(env_input->GetBlock())) { + env_input = GetInstrCopy(env_input); + DCHECK(env_input != nullptr && env_input->GetBlock() != nullptr); + } + copy_env->SetRawEnvAt(i, env_input); + if (env_input != nullptr) { + env_input->AddEnvUseAt(copy_env, i); + } + } + // InsertRawEnvironment assumes that instruction already has an environment that's why we use + // SetRawEnvironment in the 'else' case. + // As this function calls itself recursively with the same copy_instr - this copy_instr may + // have partially copied chain of HEnvironments. + if (copy_instr->HasEnvironment()) { + copy_instr->InsertRawEnvironment(copy_env); + } else { + copy_instr->SetRawEnvironment(copy_env); + } +} + +// +// Helpers for RemapEdgesSuccessors. +// + +void SuperblockCloner::RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + + size_t this_index = orig_succ->GetPredecessorIndexOf(orig_block); + size_t phi_input_count = 0; + // This flag reflects whether the original successor has at least one phi and this phi + // has been already processed in the loop. Used for validation purposes in DCHECK to check that + // in the end all of the phis in the copy successor have the same number of inputs - the number + // of copy successor's predecessors. + bool first_phi_met = false; + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(this_index); + // Remove corresponding input for original phi. + orig_phi->RemoveInputAt(this_index); + // Copy phi doesn't yet have either orig_block as predecessor or the input that corresponds + // to orig_block, so add the input at the end of the list. + copy_phi->AddInput(orig_phi_input); + if (!first_phi_met) { + phi_input_count = copy_phi->InputCount(); + first_phi_met = true; + } else { + DCHECK_EQ(phi_input_count, copy_phi->InputCount()); + } + } + // orig_block will be put at the end of the copy_succ's predecessors list; that corresponds + // to the previously added phi inputs position. + orig_block->ReplaceSuccessor(orig_succ, copy_succ); + DCHECK(!first_phi_met || copy_succ->GetPredecessors().size() == phi_input_count); +} + +void SuperblockCloner::AddCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + HBasicBlock* copy_succ = GetBlockCopy(orig_succ); + copy_block->AddSuccessor(copy_succ); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + copy_phi->AddInput(orig_phi_input); + } +} + +void SuperblockCloner::RemapCopyInternalEdge(HBasicBlock* orig_block, + HBasicBlock* orig_succ) { + DCHECK(IsInOrigBBSet(orig_succ)); + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + DCHECK(copy_block->HasSuccessor(orig_succ)); + + size_t orig_index = orig_succ->GetPredecessorIndexOf(orig_block); + for (HInstructionIterator it(orig_succ->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HInstruction* orig_phi_input = orig_phi->InputAt(orig_index); + orig_phi->AddInput(orig_phi_input); + } +} + +// +// Local versions of CF calculation/adjustment routines. +// + +// TODO: merge with the original version in nodes.cc. The concern is that we don't want to affect +// the performance of the base version by checking the local set. +// TODO: this version works when updating the back edges info for natural loop-based local_set. +// Check which exactly types of subgraphs can be analysed or rename it to +// FindBackEdgesInTheNaturalLoop. +void SuperblockCloner::FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set) { + ArenaBitVector visited(arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + // "visited" must be empty on entry, it's an output argument for all visited (i.e. live) blocks. + DCHECK_EQ(visited.GetHighestBitSet(), -1); + + // Nodes that we're currently visiting, indexed by block id. + ArenaBitVector visiting(arena_, graph_->GetBlocks().size(), false, kArenaAllocGraphBuilder); + // Number of successors visited from a given node, indexed by block id. + ArenaVector<size_t> successors_visited(graph_->GetBlocks().size(), + 0u, + arena_->Adapter(kArenaAllocGraphBuilder)); + // Stack of nodes that we're currently visiting (same as marked in "visiting" above). + ArenaVector<HBasicBlock*> worklist(arena_->Adapter(kArenaAllocGraphBuilder)); + constexpr size_t kDefaultWorklistSize = 8; + worklist.reserve(kDefaultWorklistSize); + + visited.SetBit(entry_block->GetBlockId()); + visiting.SetBit(entry_block->GetBlockId()); + worklist.push_back(entry_block); + + while (!worklist.empty()) { + HBasicBlock* current = worklist.back(); + uint32_t current_id = current->GetBlockId(); + if (successors_visited[current_id] == current->GetSuccessors().size()) { + visiting.ClearBit(current_id); + worklist.pop_back(); + } else { + HBasicBlock* successor = current->GetSuccessors()[successors_visited[current_id]++]; + uint32_t successor_id = successor->GetBlockId(); + if (!local_set->IsBitSet(successor_id)) { + continue; + } + + if (visiting.IsBitSet(successor_id)) { + DCHECK(ContainsElement(worklist, successor)); + successor->AddBackEdgeWhileUpdating(current); + } else if (!visited.IsBitSet(successor_id)) { + visited.SetBit(successor_id); + visiting.SetBit(successor_id); + worklist.push_back(successor); + } + } + } +} + +void SuperblockCloner::RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set) { + // TODO: DCHECK that after the transformation the graph is connected. + HBasicBlock* block_entry = nullptr; + + if (outer_loop_ == nullptr) { + for (auto block : graph_->GetBlocks()) { + if (block != nullptr) { + outer_loop_bb_set->SetBit(block->GetBlockId()); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + block_entry = graph_->GetEntryBlock(); + } else { + outer_loop_bb_set->Copy(&outer_loop_bb_set_); + block_entry = outer_loop_->GetHeader(); + + // Add newly created copy blocks. + for (auto entry : *bb_map_) { + outer_loop_bb_set->SetBit(entry.second->GetBlockId()); + } + + // Clear loop_info for the whole outer loop. + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + if (info != nullptr) { + info->ResetBasicBlockData(); + } + } + } + + FindBackEdgesLocal(block_entry, outer_loop_bb_set); + + for (uint32_t idx : outer_loop_bb_set->Indexes()) { + HBasicBlock* block = GetBlockById(idx); + HLoopInformation* info = block->GetLoopInformation(); + // Reset LoopInformation for regular blocks and old headers which are no longer loop headers. + if (info != nullptr && + (info->GetHeader() != block || info->NumberOfBackEdges() == 0)) { + block->SetLoopInformation(nullptr); + } + } +} + +// This is a modified version of HGraph::AnalyzeLoops. +GraphAnalysisResult SuperblockCloner::AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set) { + // We iterate post order to ensure we visit inner loops before outer loops. + // `PopulateRecursive` needs this guarantee to know whether a natural loop + // contains an irreducible loop. + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + if (block->IsCatchBlock()) { + // TODO: Dealing with exceptional back edges could be tricky because + // they only approximate the real control flow. Bail out for now. + return kAnalysisFailThrowCatchLoop; + } + block->GetLoopInformation()->Populate(); + } + } + + for (HBasicBlock* block : graph_->GetPostOrder()) { + if (!outer_loop_bb_set->IsBitSet(block->GetBlockId())) { + continue; + } + if (block->IsLoopHeader()) { + HLoopInformation* cur_loop = block->GetLoopInformation(); + HLoopInformation* outer_loop = cur_loop->GetPreHeader()->GetLoopInformation(); + if (outer_loop != nullptr) { + outer_loop->PopulateInnerLoopUpwards(cur_loop); + } + } + } + + return kAnalysisSuccess; +} + +void SuperblockCloner::CleanUpControlFlow() { + // TODO: full control flow clean up for now, optimize it. + graph_->ClearDominanceInformation(); + + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + // TODO: do it locally. + graph_->SimplifyCFG(); + graph_->ComputeDominanceInformation(); + + // AnalyzeLoopsLocally requires a correct post-ordering information which was calculated just + // before in ComputeDominanceInformation. + GraphAnalysisResult result = AnalyzeLoopsLocally(&outer_loop_bb_set); + DCHECK_EQ(result, kAnalysisSuccess); + + // TODO: do it locally + OrderLoopsHeadersPredecessors(graph_); + + graph_->ComputeTryBlockInformation(); +} + +// +// Helpers for ResolveDataFlow +// + +void SuperblockCloner::ResolvePhi(HPhi* phi) { + HBasicBlock* phi_block = phi->GetBlock(); + for (size_t i = 0, e = phi->InputCount(); i < e; i++) { + HInstruction* input = phi->InputAt(i); + HBasicBlock* input_block = input->GetBlock(); + + // Originally defined outside the region. + if (!IsInOrigBBSet(input_block)) { + continue; + } + HBasicBlock* corresponding_block = phi_block->GetPredecessors()[i]; + if (!IsInOrigBBSet(corresponding_block)) { + phi->ReplaceInput(GetInstrCopy(input), i); + } + } +} + +// +// Main algorithm methods. +// + +void SuperblockCloner::SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits) { + DCHECK(exits->empty()); + for (uint32_t block_id : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(block_id); + for (HBasicBlock* succ : block->GetSuccessors()) { + if (!IsInOrigBBSet(succ)) { + exits->push_back(succ); + } + } + } +} + +void SuperblockCloner::FindAndSetLocalAreaForAdjustments() { + DCHECK(outer_loop_ == nullptr); + ArenaVector<HBasicBlock*> exits(arena_->Adapter(kArenaAllocSuperblockCloner)); + SearchForSubgraphExits(&exits); + + // For a reducible graph we need to update back-edges and dominance information only for + // the outermost loop which is affected by the transformation - it can be found by picking + // the common most outer loop of loops to which the subgraph exits blocks belong. + // Note: it can a loop or the whole graph (outer_loop_ will be nullptr in this case). + for (HBasicBlock* exit : exits) { + HLoopInformation* loop_exit_loop_info = exit->GetLoopInformation(); + if (loop_exit_loop_info == nullptr) { + outer_loop_ = nullptr; + break; + } + outer_loop_ = FindCommonLoop(outer_loop_, loop_exit_loop_info); + } + + if (outer_loop_ != nullptr) { + // Save the loop population info as it will be changed later. + outer_loop_bb_set_.Copy(&outer_loop_->GetBlocks()); + } +} + +void SuperblockCloner::RemapEdgesSuccessors() { + // Redirect incoming edges. + for (HEdge e : *remap_incoming_) { + HBasicBlock* orig_block = GetBlockById(e.GetFrom()); + HBasicBlock* orig_succ = GetBlockById(e.GetTo()); + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + + // Redirect internal edges. + for (uint32_t orig_block_id : orig_bb_set_.Indexes()) { + HBasicBlock* orig_block = GetBlockById(orig_block_id); + + for (HBasicBlock* orig_succ : orig_block->GetSuccessors()) { + uint32_t orig_succ_id = orig_succ->GetBlockId(); + + // Check for outgoing edge. + if (!IsInOrigBBSet(orig_succ)) { + HBasicBlock* copy_block = GetBlockCopy(orig_block); + copy_block->AddSuccessor(orig_succ); + continue; + } + + auto orig_redir = remap_orig_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + auto copy_redir = remap_copy_internal_->Find(HEdge(orig_block_id, orig_succ_id)); + + // Due to construction all successors of copied block were set to original. + if (copy_redir != remap_copy_internal_->end()) { + RemapCopyInternalEdge(orig_block, orig_succ); + } else { + AddCopyInternalEdge(orig_block, orig_succ); + } + + if (orig_redir != remap_orig_internal_->end()) { + RemapOrigInternalOrIncomingEdge(orig_block, orig_succ); + } + } + } +} + +void SuperblockCloner::AdjustControlFlowInfo() { + ArenaBitVector outer_loop_bb_set( + arena_, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + RecalculateBackEdgesInfo(&outer_loop_bb_set); + + graph_->ClearDominanceInformation(); + // TODO: Do it locally. + graph_->ComputeDominanceInformation(); +} + +// TODO: Current FastCase restriction guarantees that instructions' inputs are already mapped to +// the valid values; only phis' inputs must be adjusted. +void SuperblockCloner::ResolveDataFlow() { + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HPhi* orig_phi = it.Current()->AsPhi(); + HPhi* copy_phi = GetInstrCopy(orig_phi)->AsPhi(); + ResolvePhi(orig_phi); + ResolvePhi(copy_phi); + } + if (kIsDebugBuild) { + // Inputs of instruction copies must be already mapped to correspondent inputs copies. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + CheckInstructionInputsRemapping(it.Current()); + } + } + } +} + +// +// Debug and logging methods. +// + +void SuperblockCloner::CheckInstructionInputsRemapping(HInstruction* orig_instr) { + DCHECK(!orig_instr->IsPhi()); + HInstruction* copy_instr = GetInstrCopy(orig_instr); + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + DCHECK(orig_input->GetBlock()->Dominates(orig_instr->GetBlock())); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (!IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + + // Resolve environment. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + + for (size_t i = 0, e = orig_env->Size(); i < e; ++i) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + + // If original input is defined outside the region then it will remain for both original + // instruction and the copy after the transformation. + if (orig_input == nullptr || !IsInOrigBBSet(orig_input->GetBlock())) { + continue; + } + + HInstruction* copy_input = GetInstrCopy(orig_input); + DCHECK(copy_input->GetBlock()->Dominates(copy_instr->GetBlock())); + } + } +} + +// +// Public methods. +// + +SuperblockCloner::SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map) + : graph_(graph), + arena_(graph->GetAllocator()), + orig_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner), + remap_orig_internal_(nullptr), + remap_copy_internal_(nullptr), + remap_incoming_(nullptr), + bb_map_(bb_map), + hir_map_(hir_map), + outer_loop_(nullptr), + outer_loop_bb_set_(arena_, orig_bb_set->GetSizeOf(), true, kArenaAllocSuperblockCloner) { + orig_bb_set_.Copy(orig_bb_set); +} + +void SuperblockCloner::SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming) { + remap_orig_internal_ = remap_orig_internal; + remap_copy_internal_ = remap_copy_internal; + remap_incoming_ = remap_incoming; +} + +bool SuperblockCloner::IsSubgraphClonable() const { + // TODO: Support irreducible graphs and graphs with try-catch. + if (graph_->HasIrreducibleLoops() || graph_->HasTryCatch()) { + return false; + } + + // Check that there are no instructions defined in the subgraph and used outside. + // TODO: Improve this by accepting graph with such uses but only one exit. + for (uint32_t idx : orig_bb_set_.Indexes()) { + HBasicBlock* block = GetBlockById(idx); + + for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + + for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* instr = it.Current(); + if (!instr->IsClonable() || + IsUsedOutsideRegion(instr, orig_bb_set_)) { + return false; + } + } + } + + return true; +} + +void SuperblockCloner::Run() { + DCHECK(bb_map_ != nullptr); + DCHECK(hir_map_ != nullptr); + DCHECK(remap_orig_internal_ != nullptr && + remap_copy_internal_ != nullptr && + remap_incoming_ != nullptr); + DCHECK(IsSubgraphClonable()); + + // Find an area in the graph for which control flow information should be adjusted. + FindAndSetLocalAreaForAdjustments(); + // Clone the basic blocks from the orig_bb_set_; data flow is invalid after the call and is to be + // adjusted. + CloneBasicBlocks(); + // Connect the blocks together/remap successors and fix phis which are directly affected my the + // remapping. + RemapEdgesSuccessors(); + // Recalculate dominance and backedge information which is required by the next stage. + AdjustControlFlowInfo(); + // Fix data flow of the graph. + ResolveDataFlow(); +} + +void SuperblockCloner::CleanUp() { + CleanUpControlFlow(); + + // Remove phis which have all inputs being same. + // When a block has a single predecessor it must not have any phis. However after the + // transformation it could happen that there is such block with a phi with a single input. + // As this is needed to be processed we also simplify phis with multiple same inputs here. + for (auto entry : *bb_map_) { + HBasicBlock* orig_block = entry.first; + for (HInstructionIterator inst_it(orig_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + orig_block->RemovePhi(phi); + } + } + + HBasicBlock* copy_block = GetBlockCopy(orig_block); + for (HInstructionIterator inst_it(copy_block->GetPhis()); !inst_it.Done(); inst_it.Advance()) { + HPhi* phi = inst_it.Current()->AsPhi(); + if (ArePhiInputsTheSame(phi)) { + phi->ReplaceWith(phi->InputAt(0)); + copy_block->RemovePhi(phi); + } + } + } +} + +HBasicBlock* SuperblockCloner::CloneBasicBlock(const HBasicBlock* orig_block) { + HGraph* graph = orig_block->GetGraph(); + HBasicBlock* copy_block = new (arena_) HBasicBlock(graph, orig_block->GetDexPc()); + graph->AddBlock(copy_block); + + // Clone all the phis and add them to the map. + for (HInstructionIterator it(orig_block->GetPhis()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + copy_block->AddPhi(copy_instr->AsPhi()); + copy_instr->AsPhi()->RemoveAllInputs(); + DCHECK(!orig_instr->HasEnvironment()); + hir_map_->Put(orig_instr, copy_instr); + } + + // Clone all the instructions and add them to the map. + for (HInstructionIterator it(orig_block->GetInstructions()); !it.Done(); it.Advance()) { + HInstruction* orig_instr = it.Current(); + HInstruction* copy_instr = orig_instr->Clone(arena_); + ReplaceInputsWithCopies(copy_instr); + copy_block->AddInstruction(copy_instr); + if (orig_instr->HasEnvironment()) { + DeepCloneEnvironmentWithRemapping(copy_instr, orig_instr->GetEnvironment()); + } + hir_map_->Put(orig_instr, copy_instr); + } + + return copy_block; +} + +void SuperblockCloner::CloneBasicBlocks() { + // By this time ReversePostOrder must be valid: in 'CloneBasicBlock' inputs of the copied + // instructions might be replaced by copies of the original inputs (depending where those inputs + // are defined). So the definitions of the original inputs must be visited before their original + // uses. The property of the reducible graphs "if 'A' dom 'B' then rpo_num('A') >= rpo_num('B')" + // guarantees that. + for (HBasicBlock* orig_block : graph_->GetReversePostOrder()) { + if (!IsInOrigBBSet(orig_block)) { + continue; + } + HBasicBlock* copy_block = CloneBasicBlock(orig_block); + bb_map_->Put(orig_block, copy_block); + if (kSuperblockClonerLogging) { + std::cout << "new block :" << copy_block->GetBlockId() << ": " << orig_block->GetBlockId() << + std::endl; + } + } +} + +} // namespace art diff --git a/compiler/optimizing/superblock_cloner.h b/compiler/optimizing/superblock_cloner.h new file mode 100644 index 0000000000..23de692673 --- /dev/null +++ b/compiler/optimizing/superblock_cloner.h @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ +#define ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ + +#include "base/arena_bit_vector.h" +#include "base/arena_containers.h" +#include "base/bit_vector-inl.h" +#include "nodes.h" + +namespace art { + +static const bool kSuperblockClonerLogging = false; +static const bool kSuperblockClonerVerify = false; + +// Represents an edge between two HBasicBlocks. +// +// Note: objects of this class are small - pass them by value. +class HEdge : public ArenaObject<kArenaAllocSuperblockCloner> { + public: + HEdge(HBasicBlock* from, HBasicBlock* to) : from_(from->GetBlockId()), to_(to->GetBlockId()) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge(uint32_t from, uint32_t to) : from_(from), to_(to) { + DCHECK_NE(to_, kInvalidBlockId); + DCHECK_NE(from_, kInvalidBlockId); + } + HEdge() : from_(kInvalidBlockId), to_(kInvalidBlockId) {} + + uint32_t GetFrom() const { return from_; } + uint32_t GetTo() const { return to_; } + + bool operator==(const HEdge& other) const { + return this->from_ == other.from_ && this->to_ == other.to_; + } + + bool operator!=(const HEdge& other) const { return !operator==(other); } + void Dump(std::ostream& stream) const; + + // Returns whether an edge represents a valid edge in CF graph: whether the from_ block + // has to_ block as a successor. + bool IsValid() const { return from_ != kInvalidBlockId && to_ != kInvalidBlockId; } + + private: + // Predecessor block id. + uint32_t from_; + // Successor block id. + uint32_t to_; +}; + +// Returns whether a HEdge edge corresponds to an existing edge in the graph. +inline bool IsEdgeValid(HEdge edge, HGraph* graph) { + if (!edge.IsValid()) { + return false; + } + uint32_t from = edge.GetFrom(); + uint32_t to = edge.GetTo(); + if (from >= graph->GetBlocks().size() || to >= graph->GetBlocks().size()) { + return false; + } + + HBasicBlock* block_from = graph->GetBlocks()[from]; + HBasicBlock* block_to = graph->GetBlocks()[to]; + if (block_from == nullptr || block_to == nullptr) { + return false; + } + + return block_from->HasSuccessor(block_to, 0); +} + +// SuperblockCloner provides a feature of cloning subgraphs in a smart, high level way without +// fine grain manipulation with IR; data flow and graph properties are resolved/adjusted +// automatically. The clone transformation is defined by specifying a set of basic blocks to copy +// and a set of rules how to treat edges, remap their successors. By using this approach such +// optimizations as Branch Target Expansion, Loop Peeling, Loop Unrolling can be implemented. +// +// The idea of the transformation is based on "Superblock cloning" technique described in the book +// "Engineering a Compiler. Second Edition", Keith D. Cooper, Linda Torczon, Rice University +// Houston, Texas. 2nd edition, Morgan Kaufmann. The original paper is "The Superblock: An Efective +// Technique for VLIW and Superscalar Compilation" by Hwu, W.M.W., Mahlke, S.A., Chen, W.Y. et al. +// J Supercomput (1993) 7: 229. doi:10.1007/BF01205185. +// +// There are two states of the IR graph: original graph (before the transformation) and +// copy graph (after). +// +// Before the transformation: +// Defining a set of basic block to copy (orig_bb_set) partitions all of the edges in the original +// graph into 4 categories/sets (use the following notation for edges: "(pred, succ)", +// where pred, succ - basic blocks): +// - internal - pred, succ are members of ‘orig_bb_set’. +// - outside - pred, succ are not members of ‘orig_bb_set’. +// - incoming - pred is not a member of ‘orig_bb_set’, succ is. +// - outgoing - pred is a member of ‘orig_bb_set’, succ is not. +// +// Transformation: +// +// 1. Initial cloning: +// 1.1. For each ‘orig_block’ in orig_bb_set create a copy ‘copy_block’; these new blocks +// form ‘copy_bb_set’. +// 1.2. For each edge (X, Y) from internal set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_internal’ edge +// set. +// 1.3. For each edge (X, Y) from outgoing set create an edge (X_1, Y_1) where X_1, Y_1 are the +// copies of X, Y basic blocks correspondingly; these new edges form ‘copy_outgoing’ edge +// set. +// 2. Successors remapping. +// 2.1. 'remap_orig_internal’ - set of edges (X, Y) from ‘orig_bb_set’ whose successors should +// be remapped to copy nodes: ((X, Y) will be transformed into (X, Y_1)). +// 2.2. ‘remap_copy_internal’ - set of edges (X_1, Y_1) from ‘copy_bb_set’ whose successors +// should be remapped to copy nodes: (X_1, Y_1) will be transformed into (X_1, Y)). +// 2.3. 'remap_incoming’ - set of edges (X, Y) from the ‘incoming’ edge set in the original graph +// whose successors should be remapped to copies nodes: ((X, Y) will be transformed into +// (X, Y_1)). +// 3. Adjust control flow structures and relations (dominance, reverse post order, loops, etc). +// 4. Fix/resolve data flow. +// 5. Do cleanups (DCE, critical edges splitting, etc). +// +class SuperblockCloner : public ValueObject { + public: + // TODO: Investigate optimal types for the containers. + using HBasicBlockMap = ArenaSafeMap<HBasicBlock*, HBasicBlock*>; + using HInstructionMap = ArenaSafeMap<HInstruction*, HInstruction*>; + using HBasicBlockSet = ArenaBitVector; + using HEdgeSet = ArenaHashSet<HEdge>; + + SuperblockCloner(HGraph* graph, + const HBasicBlockSet* orig_bb_set, + HBasicBlockMap* bb_map, + HInstructionMap* hir_map); + + // Sets edge successor remapping info specified by corresponding edge sets. + void SetSuccessorRemappingInfo(const HEdgeSet* remap_orig_internal, + const HEdgeSet* remap_copy_internal, + const HEdgeSet* remap_incoming); + + // Returns whether the specified subgraph is copyable. + // TODO: Start from small range of graph patterns then extend it. + bool IsSubgraphClonable() const; + + // Runs the copy algorithm according to the description. + void Run(); + + // Cleans up the graph after transformation: splits critical edges, recalculates control flow + // information (back-edges, dominators, loop info, etc), eliminates redundant phis. + void CleanUp(); + + // Returns a clone of a basic block (orig_block). + // + // - The copy block will have no successors/predecessors; they should be set up manually. + // - For each instruction in the orig_block a copy is created and inserted into the copy block; + // this correspondence is recorded in the map (old instruction, new instruction). + // - Graph HIR is not valid after this transformation: all of the HIRs have their inputs the + // same, as in the original block, PHIs do not reflect a correct correspondence between the + // value and predecessors (as the copy block has no predecessors by now), etc. + HBasicBlock* CloneBasicBlock(const HBasicBlock* orig_block); + + // Creates a clone for each basic blocks in orig_bb_set adding corresponding entries into bb_map_ + // and hir_map_. + void CloneBasicBlocks(); + + HInstruction* GetInstrCopy(HInstruction* orig_instr) const { + auto copy_input_iter = hir_map_->find(orig_instr); + DCHECK(copy_input_iter != hir_map_->end()); + return copy_input_iter->second; + } + + HBasicBlock* GetBlockCopy(HBasicBlock* orig_block) const { + HBasicBlock* block = bb_map_->Get(orig_block); + DCHECK(block != nullptr); + return block; + } + + HInstruction* GetInstrOrig(HInstruction* copy_instr) const { + for (auto it : *hir_map_) { + if (it.second == copy_instr) { + return it.first; + } + } + return nullptr; + } + + bool IsInOrigBBSet(uint32_t block_id) const { + return orig_bb_set_.IsBitSet(block_id); + } + + bool IsInOrigBBSet(const HBasicBlock* block) const { + return IsInOrigBBSet(block->GetBlockId()); + } + + private: + // Fills the 'exits' vector with the subgraph exits. + void SearchForSubgraphExits(ArenaVector<HBasicBlock*>* exits); + + // Finds and records information about the area in the graph for which control-flow (back edges, + // loops, dominators) needs to be adjusted. + void FindAndSetLocalAreaForAdjustments(); + + // Remaps edges' successors according to the info specified in the edges sets. + // + // Only edge successors/predecessors and phis' input records (to have a correspondence between + // a phi input record (not value) and a block's predecessor) are adjusted at this stage: neither + // phis' nor instructions' inputs values are resolved. + void RemapEdgesSuccessors(); + + // Adjusts control-flow (back edges, loops, dominators) for the local area defined by + // FindAndSetLocalAreaForAdjustments. + void AdjustControlFlowInfo(); + + // Resolves Data Flow - adjusts phis' and instructions' inputs in order to have a valid graph in + // the SSA form. + void ResolveDataFlow(); + + // + // Helpers for CloneBasicBlock. + // + + // Adjusts copy instruction's inputs: if the input of the original instruction is defined in the + // orig_bb_set, replaces it with a corresponding copy otherwise leaves it the same as original. + void ReplaceInputsWithCopies(HInstruction* copy_instr); + + // Recursively clones the environment for the copy instruction. If the input of the original + // environment is defined in the orig_bb_set, replaces it with a corresponding copy otherwise + // leaves it the same as original. + void DeepCloneEnvironmentWithRemapping(HInstruction* copy_instr, const HEnvironment* orig_env); + + // + // Helpers for RemapEdgesSuccessors. + // + + // Remaps incoming or original internal edge to its copy, adjusts the phi inputs in orig_succ and + // copy_succ. + void RemapOrigInternalOrIncomingEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Adds copy internal edge (from copy_block to copy_succ), updates phis in the copy_succ. + void AddCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // Remaps copy internal edge to its origin, adjusts the phi inputs in orig_succ. + void RemapCopyInternalEdge(HBasicBlock* orig_block, HBasicBlock* orig_succ); + + // + // Local versions of control flow calculation/adjustment routines. + // + + void FindBackEdgesLocal(HBasicBlock* entry_block, ArenaBitVector* local_set); + void RecalculateBackEdgesInfo(ArenaBitVector* outer_loop_bb_set); + GraphAnalysisResult AnalyzeLoopsLocally(ArenaBitVector* outer_loop_bb_set); + void CleanUpControlFlow(); + + // + // Helpers for ResolveDataFlow + // + + // Resolves the inputs of the phi. + void ResolvePhi(HPhi* phi); + + // + // Debug and logging methods. + // + void CheckInstructionInputsRemapping(HInstruction* orig_instr); + + HBasicBlock* GetBlockById(uint32_t block_id) const { + DCHECK(block_id < graph_->GetBlocks().size()); + HBasicBlock* block = graph_->GetBlocks()[block_id]; + DCHECK(block != nullptr); + return block; + } + + HGraph* const graph_; + ArenaAllocator* const arena_; + + // Set of basic block in the original graph to be copied. + HBasicBlockSet orig_bb_set_; + + // Sets of edges which require successors remapping. + const HEdgeSet* remap_orig_internal_; + const HEdgeSet* remap_copy_internal_; + const HEdgeSet* remap_incoming_; + + // Correspondence map for blocks: (original block, copy block). + HBasicBlockMap* bb_map_; + // Correspondence map for instructions: (original HInstruction, copy HInstruction). + HInstructionMap* hir_map_; + // Area in the graph for which control-flow (back edges, loops, dominators) needs to be adjusted. + HLoopInformation* outer_loop_; + HBasicBlockSet outer_loop_bb_set_; + + ART_FRIEND_TEST(SuperblockClonerTest, AdjustControlFlowInfo); + + DISALLOW_COPY_AND_ASSIGN(SuperblockCloner); +}; + +} // namespace art + +namespace std { + +template <> +struct hash<art::HEdge> { + size_t operator()(art::HEdge const& x) const noexcept { + // Use Cantor pairing function as the hash function. + uint32_t a = x.GetFrom(); + uint32_t b = x.GetTo(); + return (a + b) * (a + b + 1) / 2 + b; + } +}; + +} // namespace std + +#endif // ART_COMPILER_OPTIMIZING_SUPERBLOCK_CLONER_H_ diff --git a/compiler/optimizing/superblock_cloner_test.cc b/compiler/optimizing/superblock_cloner_test.cc new file mode 100644 index 0000000000..f1b7bffdf5 --- /dev/null +++ b/compiler/optimizing/superblock_cloner_test.cc @@ -0,0 +1,306 @@ +/* + * Copyright (C) 2017 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "graph_checker.h" +#include "nodes.h" +#include "optimizing_unit_test.h" +#include "superblock_cloner.h" + +#include "gtest/gtest.h" + +namespace art { + +using HBasicBlockMap = SuperblockCloner::HBasicBlockMap; +using HInstructionMap = SuperblockCloner::HInstructionMap; + +// This class provides methods and helpers for testing various cloning and copying routines: +// individual instruction cloning and cloning of the more coarse-grain structures. +class SuperblockClonerTest : public OptimizingUnitTest { + public: + SuperblockClonerTest() + : graph_(CreateGraph()), entry_block_(nullptr), exit_block_(nullptr), parameter_(nullptr) {} + + void CreateBasicLoopControlFlow(/* out */ HBasicBlock** header_p, + /* out */ HBasicBlock** body_p) { + entry_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(entry_block_); + graph_->SetEntryBlock(entry_block_); + + HBasicBlock* loop_preheader = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* loop_header = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* loop_body = new (GetAllocator()) HBasicBlock(graph_); + HBasicBlock* loop_exit = new (GetAllocator()) HBasicBlock(graph_); + + graph_->AddBlock(loop_preheader); + graph_->AddBlock(loop_header); + graph_->AddBlock(loop_body); + graph_->AddBlock(loop_exit); + + exit_block_ = new (GetAllocator()) HBasicBlock(graph_); + graph_->AddBlock(exit_block_); + graph_->SetExitBlock(exit_block_); + + entry_block_->AddSuccessor(loop_preheader); + loop_preheader->AddSuccessor(loop_header); + // Loop exit first to have a proper exit condition/target for HIf. + loop_header->AddSuccessor(loop_exit); + loop_header->AddSuccessor(loop_body); + loop_body->AddSuccessor(loop_header); + loop_exit->AddSuccessor(exit_block_); + + *header_p = loop_header; + *body_p = loop_body; + + parameter_ = new (GetAllocator()) HParameterValue(graph_->GetDexFile(), + dex::TypeIndex(0), + 0, + DataType::Type::kInt32); + entry_block_->AddInstruction(parameter_); + loop_exit->AddInstruction(new (GetAllocator()) HReturnVoid()); + exit_block_->AddInstruction(new (GetAllocator()) HExit()); + } + + void CreateBasicLoopDataFlow(HBasicBlock* loop_header, HBasicBlock* loop_body) { + uint32_t dex_pc = 0; + + // Entry block. + HIntConstant* const_0 = graph_->GetIntConstant(0); + HIntConstant* const_1 = graph_->GetIntConstant(1); + HIntConstant* const_128 = graph_->GetIntConstant(128); + + // Header block. + HPhi* phi = new (GetAllocator()) HPhi(GetAllocator(), 0, 0, DataType::Type::kInt32); + HInstruction* suspend_check = new (GetAllocator()) HSuspendCheck(); + + loop_header->AddPhi(phi); + loop_header->AddInstruction(suspend_check); + loop_header->AddInstruction(new (GetAllocator()) HGreaterThanOrEqual(phi, const_128)); + loop_header->AddInstruction(new (GetAllocator()) HIf(parameter_)); + + // Loop body block. + HInstruction* null_check = new (GetAllocator()) HNullCheck(parameter_, dex_pc); + HInstruction* array_length = new (GetAllocator()) HArrayLength(null_check, dex_pc); + HInstruction* bounds_check = new (GetAllocator()) HBoundsCheck(phi, array_length, dex_pc); + HInstruction* array_get = + new (GetAllocator()) HArrayGet(null_check, bounds_check, DataType::Type::kInt32, dex_pc); + HInstruction* add = new (GetAllocator()) HAdd(DataType::Type::kInt32, array_get, const_1); + HInstruction* array_set = + new (GetAllocator()) HArraySet(null_check, bounds_check, add, DataType::Type::kInt32, dex_pc); + HInstruction* induction_inc = new (GetAllocator()) HAdd(DataType::Type::kInt32, phi, const_1); + + loop_body->AddInstruction(null_check); + loop_body->AddInstruction(array_length); + loop_body->AddInstruction(bounds_check); + loop_body->AddInstruction(array_get); + loop_body->AddInstruction(add); + loop_body->AddInstruction(array_set); + loop_body->AddInstruction(induction_inc); + loop_body->AddInstruction(new (GetAllocator()) HGoto()); + + phi->AddInput(const_0); + phi->AddInput(induction_inc); + + graph_->SetHasBoundsChecks(true); + + // Adjust HEnvironment for each instruction which require that. + ArenaVector<HInstruction*> current_locals({phi, const_128, parameter_}, + GetAllocator()->Adapter(kArenaAllocInstruction)); + + HEnvironment* env = ManuallyBuildEnvFor(suspend_check, ¤t_locals); + null_check->CopyEnvironmentFrom(env); + bounds_check->CopyEnvironmentFrom(env); + } + + HEnvironment* ManuallyBuildEnvFor(HInstruction* instruction, + ArenaVector<HInstruction*>* current_locals) { + HEnvironment* environment = new (GetAllocator()) HEnvironment( + (GetAllocator()), + current_locals->size(), + graph_->GetArtMethod(), + instruction->GetDexPc(), + instruction); + + environment->CopyFrom(ArrayRef<HInstruction* const>(*current_locals)); + instruction->SetRawEnvironment(environment); + return environment; + } + + bool CheckGraph() { + GraphChecker checker(graph_); + checker.Run(); + if (!checker.IsValid()) { + for (const std::string& error : checker.GetErrors()) { + std::cout << error << std::endl; + } + return false; + } + return true; + } + + HGraph* graph_; + + HBasicBlock* entry_block_; + HBasicBlock* exit_block_; + + HInstruction* parameter_; +}; + +TEST_F(SuperblockClonerTest, IndividualInstrCloner) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + HSuspendCheck* old_suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + CloneAndReplaceInstructionVisitor visitor(graph_); + // Do instruction cloning and replacement twice with different visiting order. + + visitor.VisitInsertionOrder(); + size_t instr_replaced_by_clones_count = visitor.GetInstrReplacedByClonesCount(); + EXPECT_EQ(instr_replaced_by_clones_count, 12u); + EXPECT_TRUE(CheckGraph()); + + visitor.VisitReversePostOrder(); + instr_replaced_by_clones_count = visitor.GetInstrReplacedByClonesCount(); + EXPECT_EQ(instr_replaced_by_clones_count, 24u); + EXPECT_TRUE(CheckGraph()); + + HSuspendCheck* new_suspend_check = header->GetLoopInformation()->GetSuspendCheck(); + EXPECT_NE(new_suspend_check, old_suspend_check); + EXPECT_NE(new_suspend_check, nullptr); +} + +// Tests SuperblockCloner::CloneBasicBlocks - check instruction cloning and initial remapping of +// instructions' inputs. +TEST_F(SuperblockClonerTest, CloneBasicBlocks) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + HBasicBlockMap bb_map(std::less<HBasicBlock*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + HInstructionMap hir_map(std::less<HInstruction*>(), arena->Adapter(kArenaAllocSuperblockCloner)); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + &bb_map, + &hir_map); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.CloneBasicBlocks(); + + EXPECT_EQ(bb_map.size(), 2u); + EXPECT_EQ(hir_map.size(), 12u); + + for (auto it : hir_map) { + HInstruction* orig_instr = it.first; + HInstruction* copy_instr = it.second; + + EXPECT_EQ(cloner.GetBlockCopy(orig_instr->GetBlock()), copy_instr->GetBlock()); + EXPECT_EQ(orig_instr->GetKind(), copy_instr->GetKind()); + EXPECT_EQ(orig_instr->GetType(), copy_instr->GetType()); + + if (orig_instr->IsPhi()) { + continue; + } + + EXPECT_EQ(orig_instr->InputCount(), copy_instr->InputCount()); + + // Check that inputs match. + for (size_t i = 0, e = orig_instr->InputCount(); i < e; i++) { + HInstruction* orig_input = orig_instr->InputAt(i); + HInstruction* copy_input = copy_instr->InputAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + + EXPECT_EQ(orig_instr->HasEnvironment(), copy_instr->HasEnvironment()); + + // Check that environments match. + if (orig_instr->HasEnvironment()) { + HEnvironment* orig_env = orig_instr->GetEnvironment(); + HEnvironment* copy_env = copy_instr->GetEnvironment(); + + EXPECT_EQ(copy_env->GetParent(), nullptr); + EXPECT_EQ(orig_env->Size(), copy_env->Size()); + + for (size_t i = 0, e = orig_env->Size(); i < e; i++) { + HInstruction* orig_input = orig_env->GetInstructionAt(i); + HInstruction* copy_input = copy_env->GetInstructionAt(i); + if (cloner.IsInOrigBBSet(orig_input->GetBlock())) { + EXPECT_EQ(cloner.GetInstrCopy(orig_input), copy_input); + } else { + EXPECT_EQ(orig_input, copy_input); + } + } + } + } +} + +// SuperblockCloner::CleanUpControlFlow - checks algorithms of local adjustments of the control +// flow. +TEST_F(SuperblockClonerTest, AdjustControlFlowInfo) { + HBasicBlock* header = nullptr; + HBasicBlock* loop_body = nullptr; + ArenaAllocator* arena = graph_->GetAllocator(); + + CreateBasicLoopControlFlow(&header, &loop_body); + CreateBasicLoopDataFlow(header, loop_body); + graph_->BuildDominatorTree(); + ASSERT_TRUE(CheckGraph()); + + ArenaBitVector orig_bb_set( + arena, graph_->GetBlocks().size(), false, kArenaAllocSuperblockCloner); + + HLoopInformation* loop_info = header->GetLoopInformation(); + orig_bb_set.Union(&loop_info->GetBlocks()); + + SuperblockCloner cloner(graph_, + &orig_bb_set, + nullptr, + nullptr); + EXPECT_TRUE(cloner.IsSubgraphClonable()); + + cloner.FindAndSetLocalAreaForAdjustments(); + cloner.CleanUpControlFlow(); + + EXPECT_TRUE(CheckGraph()); + + EXPECT_TRUE(entry_block_->Dominates(header)); + EXPECT_TRUE(entry_block_->Dominates(exit_block_)); + + EXPECT_EQ(header->GetLoopInformation(), loop_info); + EXPECT_EQ(loop_info->GetHeader(), header); + EXPECT_TRUE(loop_info->Contains(*loop_body)); + EXPECT_TRUE(loop_info->IsBackEdge(*loop_body)); +} + +} // namespace art diff --git a/compiler/optimizing/suspend_check_test.cc b/compiler/optimizing/suspend_check_test.cc index 15cd4e8a08..33823e2a11 100644 --- a/compiler/optimizing/suspend_check_test.cc +++ b/compiler/optimizing/suspend_check_test.cc @@ -15,7 +15,7 @@ */ #include "builder.h" -#include "dex_instruction.h" +#include "dex/dex_instruction.h" #include "nodes.h" #include "optimizing_unit_test.h" #include "pretty_printer.h" @@ -28,10 +28,13 @@ namespace art { * Check that the HGraphBuilder adds suspend checks to backward branches. */ -static void TestCode(const uint16_t* data) { - ArenaPool pool; - ArenaAllocator allocator(&pool); - HGraph* graph = CreateCFG(&allocator, data); +class SuspendCheckTest : public OptimizingUnitTest { + protected: + void TestCode(const std::vector<uint16_t>& data); +}; + +void SuspendCheckTest::TestCode(const std::vector<uint16_t>& data) { + HGraph* graph = CreateCFG(data); HBasicBlock* first_block = graph->GetEntryBlock()->GetSingleSuccessor(); HBasicBlock* loop_header = first_block->GetSingleSuccessor(); ASSERT_TRUE(loop_header->IsLoopHeader()); @@ -39,10 +42,8 @@ static void TestCode(const uint16_t* data) { ASSERT_TRUE(loop_header->GetFirstInstruction()->IsSuspendCheck()); } -class SuspendCheckTest : public CommonCompilerTest {}; - TEST_F(SuspendCheckTest, CFG1) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::NOP, Instruction::GOTO | 0xFF00); @@ -50,14 +51,14 @@ TEST_F(SuspendCheckTest, CFG1) { } TEST_F(SuspendCheckTest, CFG2) { - const uint16_t data[] = ZERO_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ZERO_REGISTER_CODE_ITEM( Instruction::GOTO_32, 0, 0); TestCode(data); } TEST_F(SuspendCheckTest, CFG3) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQ, 0xFFFF, Instruction::RETURN_VOID); @@ -66,7 +67,7 @@ TEST_F(SuspendCheckTest, CFG3) { } TEST_F(SuspendCheckTest, CFG4) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NE, 0xFFFF, Instruction::RETURN_VOID); @@ -75,7 +76,7 @@ TEST_F(SuspendCheckTest, CFG4) { } TEST_F(SuspendCheckTest, CFG5) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_EQZ, 0xFFFF, Instruction::RETURN_VOID); @@ -84,7 +85,7 @@ TEST_F(SuspendCheckTest, CFG5) { } TEST_F(SuspendCheckTest, CFG6) { - const uint16_t data[] = ONE_REGISTER_CODE_ITEM( + const std::vector<uint16_t> data = ONE_REGISTER_CODE_ITEM( Instruction::CONST_4 | 0 | 0, Instruction::IF_NEZ, 0xFFFF, Instruction::RETURN_VOID); diff --git a/compiler/optimizing/x86_memory_gen.cc b/compiler/optimizing/x86_memory_gen.cc index 4e256832a2..0271850f29 100644 --- a/compiler/optimizing/x86_memory_gen.cc +++ b/compiler/optimizing/x86_memory_gen.cc @@ -41,7 +41,7 @@ class MemoryOperandVisitor : public HGraphVisitor { } HInstruction* array = array_len->InputAt(0); - DCHECK_EQ(array->GetType(), Primitive::kPrimNot); + DCHECK_EQ(array->GetType(), DataType::Type::kReference); // Don't apply this optimization when the array is nullptr. if (array->IsConstant() || (array->IsNullCheck() && array->InputAt(0)->IsConstant())) { |