diff options
Diffstat (limited to 'compiler')
32 files changed, 1230 insertions, 441 deletions
diff --git a/compiler/Android.mk b/compiler/Android.mk index 41e9744777..96e13ac9a3 100644 --- a/compiler/Android.mk +++ b/compiler/Android.mk @@ -158,6 +158,7 @@ LIBART_COMPILER_SRC_FILES_mips64 := \ $(LIBART_COMPILER_SRC_FILES_mips) \ jni/quick/mips64/calling_convention_mips64.cc \ optimizing/code_generator_mips64.cc \ + optimizing/intrinsics_mips64.cc \ utils/mips64/assembler_mips64.cc \ utils/mips64/managed_register_mips64.cc \ diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc index 7082bedc5e..d5ac34186b 100644 --- a/compiler/dex/quick/codegen_util.cc +++ b/compiler/dex/quick/codegen_util.cc @@ -1126,7 +1126,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < core_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (core_vmap_table_[i] + VmapTable::kEntryAdjustment)); } // Push a marker to take place of lr. vmap_encoder.PushBackUnsigned(VmapTable::kAdjustedFpMarker); @@ -1141,7 +1141,7 @@ CompiledMethod* Mir2Lir::GetCompiledMethod() { for (size_t i = 0 ; i < fp_vmap_table_.size(); ++i) { // Copy, stripping out the phys register sort key. vmap_encoder.PushBackUnsigned( - ~(-1 << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); + ~(~0u << VREG_NUM_WIDTH) & (fp_vmap_table_[i] + VmapTable::kEntryAdjustment)); } } } else { diff --git a/compiler/dex/quick/quick_compiler.cc b/compiler/dex/quick/quick_compiler.cc index b5ecf9c418..1cd742abac 100644 --- a/compiler/dex/quick/quick_compiler.cc +++ b/compiler/dex/quick/quick_compiler.cc @@ -391,9 +391,9 @@ static int kAllOpcodes[] = { Instruction::IGET_SHORT_QUICK, Instruction::INVOKE_LAMBDA, Instruction::UNUSED_F4, - Instruction::UNUSED_F5, + Instruction::CAPTURE_VARIABLE, Instruction::CREATE_LAMBDA, - Instruction::UNUSED_F7, + Instruction::LIBERATE_VARIABLE, Instruction::BOX_LAMBDA, Instruction::UNBOX_LAMBDA, Instruction::UNUSED_FA, diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index e19e74f37a..7ae405ab3a 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -1693,8 +1693,14 @@ void HGraphBuilder::BuildPackedSwitch(const Instruction& instruction, uint32_t d } else { // Chained cmp-and-branch, starting from starting_key. for (size_t i = 1; i <= num_entries; i++) { - BuildSwitchCaseHelper(instruction, i, i == num_entries, table, value, - starting_key + i - 1, table.GetEntryAt(i), dex_pc); + BuildSwitchCaseHelper(instruction, + i, + i == num_entries, + table, + value, + starting_key + i - 1, + table.GetEntryAt(i), + dex_pc); } } } diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 3c6a41df34..be05691741 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -203,13 +203,13 @@ class DisassemblyScope { void CodeGenerator::GenerateSlowPaths() { size_t code_start = 0; - for (size_t i = 0, e = slow_paths_.Size(); i < e; ++i) { + for (SlowPathCode* slow_path : slow_paths_) { if (disasm_info_ != nullptr) { code_start = GetAssembler()->CodeSize(); } - slow_paths_.Get(i)->EmitNativeCode(this); + slow_path->EmitNativeCode(this); if (disasm_info_ != nullptr) { - disasm_info_->AddSlowPathInterval(slow_paths_.Get(i), code_start, GetAssembler()->CodeSize()); + disasm_info_->AddSlowPathInterval(slow_path, code_start, GetAssembler()->CodeSize()); } } } diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index a1c6db0a2c..b58a3ff7f2 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -261,7 +261,7 @@ class CodeGenerator { bool IsImplicitNullCheckAllowed(HNullCheck* null_check) const; void AddSlowPath(SlowPathCode* slow_path) { - slow_paths_.Add(slow_path); + slow_paths_.push_back(slow_path); } void SetSrcMap(DefaultSrcMap* src_map) { src_map_ = src_map; } @@ -441,10 +441,12 @@ class CodeGenerator { graph_(graph), compiler_options_(compiler_options), src_map_(nullptr), - slow_paths_(graph->GetArena(), 8), + slow_paths_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)), current_block_index_(0), is_leaf_(true), - requires_current_method_(false) {} + requires_current_method_(false) { + slow_paths_.reserve(8); + } // Register allocation logic. void AllocateRegistersLocally(HInstruction* instruction) const; @@ -485,8 +487,20 @@ class CodeGenerator { return instruction_set == kX86 || instruction_set == kX86_64; } - // Arm64 has its own type for a label, so we need to templatize this method + // Arm64 has its own type for a label, so we need to templatize these methods // to share the logic. + + template <typename LabelType> + LabelType* CommonInitializeLabels() { + size_t size = GetGraph()->GetBlocks().size(); + LabelType* labels = GetGraph()->GetArena()->AllocArray<LabelType>(size, + kArenaAllocCodeGenerator); + for (size_t i = 0; i != size; ++i) { + new(labels + i) LabelType(); + } + return labels; + } + template <typename LabelType> LabelType* CommonGetLabelOf(LabelType* raw_pointer_to_labels_array, HBasicBlock* block) const { block = FirstNonEmptyBlock(block); @@ -539,7 +553,7 @@ class CodeGenerator { // Native to dex_pc map used for native debugging/profiling tools. DefaultSrcMap* src_map_; - GrowableArray<SlowPathCode*> slow_paths_; + ArenaVector<SlowPathCode*> slow_paths_; // The current block index in `block_order_` of the block // we are generating code for. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index d7b1d24887..da7a6755e9 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -432,7 +432,7 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -459,8 +459,8 @@ void CodeGeneratorARM::Finalize(CodeAllocator* allocator) { for (HBasicBlock* block : *block_order_) { // Get the label directly from block_labels_ rather than through GetLabelOf() to avoid // FirstNonEmptyBlock() which could lead to adjusting a label more than once. - DCHECK_LT(static_cast<size_t>(block->GetBlockId()), block_labels_.Size()); - Label* block_label = &block_labels_.GetRawStorage()[block->GetBlockId()]; + DCHECK_LT(block->GetBlockId(), GetGraph()->GetBlocks().size()); + Label* block_label = &block_labels_[block->GetBlockId()]; DCHECK_EQ(block_label->IsBound(), !block->IsSingleJump()); if (block_label->IsBound()) { __ AdjustLabelPosition(block_label); @@ -4034,7 +4034,8 @@ ArmAssembler* ParallelMoveResolverARM::GetAssembler() const { } void ParallelMoveResolverARM::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4166,7 +4167,8 @@ void ParallelMoveResolverARM::Exchange(int mem1, int mem2) { } void ParallelMoveResolverARM::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index 1d98789213..111112e9b2 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -299,11 +299,11 @@ class CodeGeneratorARM : public CodeGenerator { void MarkGCCard(Register temp, Register card, Register object, Register value, bool can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -335,7 +335,7 @@ class CodeGeneratorARM : public CodeGenerator { Literal* DeduplicateMethodCodeLiteral(MethodReference target_method); // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderARM location_builder_; InstructionCodeGeneratorARM instruction_visitor_; diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index d175532f4c..31900d536a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -610,7 +610,8 @@ void ParallelMoveResolverARM64::FreeScratchLocation(Location loc) { } void ParallelMoveResolverARM64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource()); } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 89671088c7..7178081bf8 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -326,12 +326,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void Initialize() OVERRIDE { - HGraph* graph = GetGraph(); - int length = graph->GetBlocks().size(); - block_labels_ = graph->GetArena()->AllocArray<vixl::Label>(length); - for (int i = 0; i < length; ++i) { - new(block_labels_ + i) vixl::Label(); - } + block_labels_ = CommonInitializeLabels<vixl::Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -400,7 +395,7 @@ class CodeGeneratorARM64 : public CodeGenerator { }; // Labels for each block that will be compiled. - vixl::Label* block_labels_; + vixl::Label* block_labels_; // Indexed by block id. vixl::Label frame_entry_label_; LocationsBuilderARM64 location_builder_; diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc index 8fdd56e0bc..c9f849318c 100644 --- a/compiler/optimizing/code_generator_mips64.cc +++ b/compiler/optimizing/code_generator_mips64.cc @@ -20,7 +20,9 @@ #include "entrypoints/quick/quick_entrypoints_enum.h" #include "gc/accounting/card_table.h" #include "intrinsics.h" +#include "intrinsics_mips64.h" #include "art_method.h" +#include "code_generator_utils.h" #include "mirror/array-inl.h" #include "mirror/class-inl.h" #include "offsets.h" @@ -36,7 +38,6 @@ static constexpr int kCurrentMethodStackOffset = 0; static constexpr GpuRegister kMethodRegisterArgument = A0; // We need extra temporary/scratch registers (in addition to AT) in some cases. -static constexpr GpuRegister TMP = T8; static constexpr FpuRegister FTMP = F8; // ART Thread Register. @@ -430,7 +431,7 @@ CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -452,12 +453,14 @@ Mips64Assembler* ParallelMoveResolverMIPS64::GetAssembler() const { } void ParallelMoveResolverMIPS64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->MoveLocation(move->GetDestination(), move->GetSource(), move->GetType()); } void ParallelMoveResolverMIPS64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; codegen_->SwapLocations(move->GetDestination(), move->GetSource(), move->GetType()); } @@ -2395,7 +2398,11 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeInterface(HInvokeInterface* invo } void LocationsBuilderMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } @@ -2404,7 +2411,11 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in // invokes must have been pruned by art::PrepareForRegisterAllocation. DCHECK(codegen_->IsBaseline() || !invoke->IsStaticWithExplicitClinitCheck()); - // TODO - intrinsic function + IntrinsicLocationsBuilderMIPS64 intrinsic(codegen_); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); // While SetupBlockedRegisters() blocks registers S2-S8 due to their @@ -2419,10 +2430,10 @@ void LocationsBuilderMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* in } } -static bool TryGenerateIntrinsicCode(HInvoke* invoke, - CodeGeneratorMIPS64* codegen ATTRIBUTE_UNUSED) { +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorMIPS64* codegen) { if (invoke->GetLocations()->Intrinsified()) { - // TODO - intrinsic function + IntrinsicCodeGeneratorMIPS64 intrinsic(codegen); + intrinsic.Dispatch(invoke); return true; } return false; @@ -2531,7 +2542,10 @@ void InstructionCodeGeneratorMIPS64::VisitInvokeStaticOrDirect(HInvokeStaticOrDi } void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) { - // TODO: Try to generate intrinsics code. + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + LocationSummary* locations = invoke->GetLocations(); Location receiver = locations->InAt(0); GpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister<GpuRegister>(); diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h index f66ecb3711..16461d6c04 100644 --- a/compiler/optimizing/code_generator_mips64.h +++ b/compiler/optimizing/code_generator_mips64.h @@ -270,11 +270,11 @@ class CodeGeneratorMIPS64 : public CodeGenerator { } Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } void Finalize(CodeAllocator* allocator) OVERRIDE; @@ -315,7 +315,7 @@ class CodeGeneratorMIPS64 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderMIPS64 location_builder_; InstructionCodeGeneratorMIPS64 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index ab3d1d1924..277f6b48c8 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -470,7 +470,7 @@ CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, 0, compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -4630,7 +4630,8 @@ void ParallelMoveResolverX86::MoveMemoryToMemory64(int dst, int src) { } void ParallelMoveResolverX86::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4782,7 +4783,8 @@ void ParallelMoveResolverX86::Exchange(int mem1, int mem2) { } void ParallelMoveResolverX86::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index f38e1ea09c..2c2fc65444 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -316,11 +316,11 @@ class CodeGeneratorX86 : public CodeGenerator { bool value_can_be_null); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE { @@ -356,7 +356,7 @@ class CodeGeneratorX86 : public CodeGenerator { private: // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86 location_builder_; InstructionCodeGeneratorX86 instruction_visitor_; diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index cfce7a0faa..453c6fd6e1 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -620,7 +620,7 @@ CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, arraysize(kFpuCalleeSaves)), compiler_options, stats), - block_labels_(graph->GetArena(), 0), + block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), @@ -4373,7 +4373,8 @@ X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const { } void ParallelMoveResolverX86_64::EmitMove(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); @@ -4531,7 +4532,8 @@ void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) { } void ParallelMoveResolverX86_64::EmitSwap(size_t index) { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; Location source = move->GetSource(); Location destination = move->GetDestination(); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 1ec3580040..197ce63847 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -286,11 +286,11 @@ class CodeGeneratorX86_64 : public CodeGenerator { void Move(Location destination, Location source); Label* GetLabelOf(HBasicBlock* block) const { - return CommonGetLabelOf<Label>(block_labels_.GetRawStorage(), block); + return CommonGetLabelOf<Label>(block_labels_, block); } void Initialize() OVERRIDE { - block_labels_.SetSize(GetGraph()->GetBlocks().size()); + block_labels_ = CommonInitializeLabels<Label>(); } bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { @@ -334,7 +334,7 @@ class CodeGeneratorX86_64 : public CodeGenerator { }; // Labels for each block that will be compiled. - GrowableArray<Label> block_labels_; + Label* block_labels_; // Indexed by block id. Label frame_entry_label_; LocationsBuilderX86_64 location_builder_; InstructionCodeGeneratorX86_64 instruction_visitor_; diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc index b322759a6c..007d0e3332 100644 --- a/compiler/optimizing/dead_code_elimination.cc +++ b/compiler/optimizing/dead_code_elimination.cc @@ -56,7 +56,11 @@ static void MarkReachableBlocks(HGraph* graph, ArenaBitVector* visited) { if (switch_input->IsIntConstant()) { int32_t switch_value = switch_input->AsIntConstant()->GetValue(); int32_t start_value = switch_instruction->GetStartValue(); - uint32_t switch_index = static_cast<uint32_t>(switch_value - start_value); + // Note: Though the spec forbids packed-switch values to wrap around, we leave + // that task to the verifier and use unsigned arithmetic with it's "modulo 2^32" + // semantics to check if the value is in range, wrapped or not. + uint32_t switch_index = + static_cast<uint32_t>(switch_value) - static_cast<uint32_t>(start_value); if (switch_index < switch_instruction->GetNumEntries()) { live_successors = live_successors.SubArray(switch_index, 1u); DCHECK_EQ(live_successors[0], block->GetSuccessor(switch_index)); diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc index d05c514912..2c6c3b726a 100644 --- a/compiler/optimizing/graph_visualizer.cc +++ b/compiler/optimizing/graph_visualizer.cc @@ -374,6 +374,11 @@ class HGraphVisualizerPrinter : public HGraphDelegateVisitor { << instance_of->MustDoNullCheck() << std::noboolalpha; } + void VisitArraySet(HArraySet* array_set) OVERRIDE { + StartAttributeStream("value_can_be_null") << std::boolalpha + << array_set->GetValueCanBeNull() << std::noboolalpha; + } + void VisitInvoke(HInvoke* invoke) OVERRIDE { StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex(); StartAttributeStream("method_name") << PrettyMethod( diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc index abdda13bb8..22bca2f111 100644 --- a/compiler/optimizing/instruction_simplifier.cc +++ b/compiler/optimizing/instruction_simplifier.cc @@ -71,7 +71,8 @@ class InstructionSimplifierVisitor : public HGraphVisitor { void VisitXor(HXor* instruction) OVERRIDE; void VisitInstanceOf(HInstanceOf* instruction) OVERRIDE; void VisitFakeString(HFakeString* fake_string) OVERRIDE; - bool IsDominatedByInputNullCheck(HInstruction* instr); + + bool CanEnsureNotNullAt(HInstruction* instr, HInstruction* at) const; OptimizingCompilerStats* stats_; bool simplification_occurred_ = false; @@ -187,14 +188,18 @@ void InstructionSimplifierVisitor::VisitNullCheck(HNullCheck* null_check) { } } -bool InstructionSimplifierVisitor::IsDominatedByInputNullCheck(HInstruction* instr) { - HInstruction* input = instr->InputAt(0); +bool InstructionSimplifierVisitor::CanEnsureNotNullAt(HInstruction* input, HInstruction* at) const { + if (!input->CanBeNull()) { + return true; + } + for (HUseIterator<HInstruction*> it(input->GetUses()); !it.Done(); it.Advance()) { HInstruction* use = it.Current()->GetUser(); - if (use->IsNullCheck() && use->StrictlyDominates(instr)) { + if (use->IsNullCheck() && use->StrictlyDominates(at)) { return true; } } + return false; } @@ -231,7 +236,7 @@ static bool TypeCheckHasKnownOutcome(HLoadClass* klass, HInstruction* object, bo void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { HInstruction* object = check_cast->InputAt(0); - if (!object->CanBeNull() || IsDominatedByInputNullCheck(check_cast)) { + if (CanEnsureNotNullAt(object, check_cast)) { check_cast->ClearMustDoNullCheck(); } @@ -267,7 +272,7 @@ void InstructionSimplifierVisitor::VisitCheckCast(HCheckCast* check_cast) { void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { HInstruction* object = instruction->InputAt(0); bool can_be_null = true; - if (!object->CanBeNull() || IsDominatedByInputNullCheck(instruction)) { + if (CanEnsureNotNullAt(object, instruction)) { can_be_null = false; instruction->ClearMustDoNullCheck(); } @@ -305,14 +310,14 @@ void InstructionSimplifierVisitor::VisitInstanceOf(HInstanceOf* instruction) { void InstructionSimplifierVisitor::VisitInstanceFieldSet(HInstanceFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } void InstructionSimplifierVisitor::VisitStaticFieldSet(HStaticFieldSet* instruction) { if ((instruction->GetValue()->GetType() == Primitive::kPrimNot) - && !instruction->GetValue()->CanBeNull()) { + && CanEnsureNotNullAt(instruction->GetValue(), instruction)) { instruction->ClearValueCanBeNull(); } } @@ -437,7 +442,7 @@ void InstructionSimplifierVisitor::VisitArraySet(HArraySet* instruction) { instruction->ClearNeedsTypeCheck(); } - if (!value->CanBeNull()) { + if (CanEnsureNotNullAt(value, instruction)) { instruction->ClearValueCanBeNull(); } } diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc index b71fdb8f1d..95646222ef 100644 --- a/compiler/optimizing/intrinsics.cc +++ b/compiler/optimizing/intrinsics.cc @@ -90,7 +90,7 @@ static Primitive::Type GetType(uint64_t data, bool is_op_size) { } static Intrinsics GetIntrinsic(InlineMethod method, InstructionSet instruction_set) { - if (instruction_set == kMips || instruction_set == kMips64) { + if (instruction_set == kMips) { return Intrinsics::kNone; } switch (method.opcode) { diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc new file mode 100644 index 0000000000..52e2cbec34 --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.cc @@ -0,0 +1,782 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "intrinsics_mips64.h" + +#include "arch/mips64/instruction_set_features_mips64.h" +#include "art_method.h" +#include "code_generator_mips64.h" +#include "entrypoints/quick/quick_entrypoints.h" +#include "intrinsics.h" +#include "mirror/array-inl.h" +#include "mirror/string.h" +#include "thread.h" +#include "utils/mips64/assembler_mips64.h" +#include "utils/mips64/constants_mips64.h" + +namespace art { + +namespace mips64 { + +IntrinsicLocationsBuilderMIPS64::IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen) + : arena_(codegen->GetGraph()->GetArena()) { +} + +Mips64Assembler* IntrinsicCodeGeneratorMIPS64::GetAssembler() { + return reinterpret_cast<Mips64Assembler*>(codegen_->GetAssembler()); +} + +ArenaAllocator* IntrinsicCodeGeneratorMIPS64::GetAllocator() { + return codegen_->GetGraph()->GetArena(); +} + +bool IntrinsicLocationsBuilderMIPS64::TryDispatch(HInvoke* invoke) { + Dispatch(invoke); + LocationSummary* res = invoke->GetLocations(); + return res != nullptr && res->Intrinsified(); +} + +#define __ assembler-> + +static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresRegister()); +} + +static void MoveFPToInt(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dmfc1(out, in); + } else { + __ Mfc1(out, in); + } +} + +// long java.lang.Double.doubleToRawLongBits(double) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Float.floatToRawIntBits(float) +void IntrinsicLocationsBuilderMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + CreateFPToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatFloatToRawIntBits(HInvoke* invoke) { + MoveFPToInt(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresFpuRegister()); +} + +static void MoveIntToFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ Dmtc1(in, out); + } else { + __ Mtc1(in, out); + } +} + +// double java.lang.Double.longBitsToDouble(long) +void IntrinsicLocationsBuilderMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitDoubleLongBitsToDouble(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Float.intBitsToFloat(int) +void IntrinsicLocationsBuilderMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + CreateIntToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitFloatIntBitsToFloat(HInvoke* invoke) { + MoveIntToFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenReverseBytes(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + switch (type) { + case Primitive::kPrimShort: + __ Dsbh(out, in); + __ Seh(out, out); + break; + case Primitive::kPrimInt: + __ Rotr(out, in, 16); + __ Wsbh(out, out); + break; + case Primitive::kPrimLong: + __ Dsbh(out, in); + __ Dshd(out, out); + break; + default: + LOG(FATAL) << "Unexpected size for reverse-bytes: " << type; + UNREACHABLE(); + } +} + +// int java.lang.Integer.reverseBytes(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverseBytes(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +// short java.lang.Short.reverseBytes(short) +void IntrinsicLocationsBuilderMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitShortReverseBytes(HInvoke* invoke) { + GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler()); +} + +static void GenCountZeroes(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dclz(out, in); + } else { + __ Clz(out, in); + } +} + +// int java.lang.Integer.numberOfLeadingZeros(int i) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), false, GetAssembler()); +} + +// int java.lang.Long.numberOfLeadingZeros(long i) +void IntrinsicLocationsBuilderMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) { + GenCountZeroes(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenReverse(LocationSummary* locations, + Primitive::Type type, + Mips64Assembler* assembler) { + DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong); + + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (type == Primitive::kPrimInt) { + __ Rotr(out, in, 16); + __ Wsbh(out, out); + __ Bitswap(out, out); + } else { + __ Dsbh(out, in); + __ Dshd(out, out); + __ Dbitswap(out, out); + } +} + +// int java.lang.Integer.reverse(int) +void IntrinsicLocationsBuilderMIPS64::VisitIntegerReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitIntegerReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler()); +} + +// long java.lang.Long.reverse(long) +void IntrinsicLocationsBuilderMIPS64::VisitLongReverse(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitLongReverse(HInvoke* invoke) { + GenReverse(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler()); +} + +static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +static void MathAbsFP(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is64bit) { + __ AbsD(out, in); + } else { + __ AbsS(out, in); + } +} + +// double java.lang.Math.abs(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsDouble(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), true, GetAssembler()); +} + +// float java.lang.Math.abs(float) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsFloat(HInvoke* invoke) { + MathAbsFP(invoke->GetLocations(), false, GetAssembler()); +} + +static void CreateIntToInt(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +static void GenAbsInteger(LocationSummary* locations, bool is64bit, Mips64Assembler* assembler) { + GpuRegister in = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (is64bit) { + __ Dsra32(AT, in, 31); + __ Xor(out, in, AT); + __ Dsubu(out, out, AT); + } else { + __ Sra(AT, in, 31); + __ Xor(out, in, AT); + __ Subu(out, out, AT); + } +} + +// int java.lang.Math.abs(int) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsInt(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsInt(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.abs(long) +void IntrinsicLocationsBuilderMIPS64::VisitMathAbsLong(HInvoke* invoke) { + CreateIntToInt(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathAbsLong(HInvoke* invoke) { + GenAbsInteger(invoke->GetLocations(), true, GetAssembler()); +} + +static void GenMinMaxFP(LocationSummary* locations, + bool is_min, + bool is_double, + Mips64Assembler* assembler) { + FpuRegister lhs = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister rhs = locations->InAt(1).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + if (is_double) { + if (is_min) { + __ MinD(out, lhs, rhs); + } else { + __ MaxD(out, lhs, rhs); + } + } else { + if (is_min) { + __ MinS(out, lhs, rhs); + } else { + __ MaxS(out, lhs, rhs); + } + } +} + +static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetInAt(1, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.min(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler()); +} + +// float java.lang.Math.min(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler()); +} + +// double java.lang.Math.max(double, double) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxDoubleDouble(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler()); +} + +// float java.lang.Math.max(float, float) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + CreateFPFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxFloatFloat(HInvoke* invoke) { + GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler()); +} + +static void GenMinMax(LocationSummary* locations, + bool is_min, + Mips64Assembler* assembler) { + GpuRegister lhs = locations->InAt(0).AsRegister<GpuRegister>(); + GpuRegister rhs = locations->InAt(1).AsRegister<GpuRegister>(); + GpuRegister out = locations->Out().AsRegister<GpuRegister>(); + + if (out == lhs) { + __ Slt(AT, rhs, lhs); + if (is_min) { + __ Seleqz(out, lhs, AT); + __ Selnez(AT, rhs, AT); + } else { + __ Selnez(out, lhs, AT); + __ Seleqz(AT, rhs, AT); + } + } else { + __ Slt(AT, lhs, rhs); + if (is_min) { + __ Seleqz(out, rhs, AT); + __ Selnez(AT, lhs, AT); + } else { + __ Selnez(out, rhs, AT); + __ Seleqz(AT, lhs, AT); + } + } + __ Or(out, out, AT); +} + +static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); + locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap); +} + +// int java.lang.Math.min(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// long java.lang.Math.min(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMinLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), true, GetAssembler()); +} + +// int java.lang.Math.max(int, int) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxIntInt(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// long java.lang.Math.max(long, long) +void IntrinsicLocationsBuilderMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + CreateIntIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathMaxLongLong(HInvoke* invoke) { + GenMinMax(invoke->GetLocations(), false, GetAssembler()); +} + +// double java.lang.Math.sqrt(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathSqrt(HInvoke* invoke) { + CreateFPToFPLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathSqrt(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ SqrtD(out, in); +} + +static void CreateFPToFP(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresFpuRegister()); + locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap); +} + +// double java.lang.Math.rint(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathRint(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathRint(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + __ RintD(out, in); +} + +// double java.lang.Math.floor(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathFloor(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +// 0x200 - +zero +// 0x040 - +infinity +// 0x020 - -zero +// 0x004 - -infinity +// 0x002 - quiet NaN +// 0x001 - signaling NaN +const constexpr uint16_t CLASS_MASK = 0x267; + +void IntrinsicCodeGeneratorMIPS64::VisitMathFloor(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double floor(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, CLASS_MASK); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = floor(in); + // if outLong == Long.MAX_VALUE { + // // floor() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that floor(in) + // // correctly truncates the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ FloorLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// double java.lang.Math.ceil(double) +void IntrinsicLocationsBuilderMIPS64::VisitMathCeil(HInvoke* invoke) { + CreateFPToFP(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMathCeil(HInvoke* invoke) { + LocationSummary* locations = invoke->GetLocations(); + Mips64Assembler* assembler = GetAssembler(); + FpuRegister in = locations->InAt(0).AsFpuRegister<FpuRegister>(); + FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>(); + + Label done; + + // double ceil(double in) { + // if in.isNaN || in.isInfinite || in.isZero { + // return in; + // } + __ ClassD(out, in); + __ Dmfc1(AT, out); + __ Andi(AT, AT, CLASS_MASK); // +0.0 | +Inf | -0.0 | -Inf | qNaN | sNaN + __ MovD(out, in); + __ Bnezc(AT, &done); + + // Long outLong = ceil(in); + // if outLong == Long.MAX_VALUE { + // // ceil() has almost certainly returned a value which + // // can't be successfully represented as a signed 64-bit + // // number. Java expects that the input value will be + // // returned in these cases. + // // There is also a small probability that ceil(in) + // // correctly rounds up the input value to Long.MAX_VALUE. In + // // that case, this exception handling code still does the + // // correct thing. + // return in; + // } + __ CeilLD(out, in); + __ Dmfc1(AT, out); + __ MovD(out, in); + __ LoadConst64(TMP, kPrimLongMax); + __ Beqc(AT, TMP, &done); + + // double out = outLong; + // return out; + __ Dmtc1(AT, out); + __ Cvtdl(out, out); + __ Bind(&done); + // } +} + +// byte libcore.io.Memory.peekByte(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lb(out, adr, 0); +} + +// short libcore.io.Memory.peekShort(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lh(out, adr, 0); +} + +// int libcore.io.Memory.peekInt(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Lw(out, adr, 0); +} + +// long libcore.io.Memory.peekLong(long address) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + CreateIntToIntLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPeekLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister out = invoke->GetLocations()->Out().AsRegister<GpuRegister>(); + + __ Ld(out, adr, 0); +} + +static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) { + LocationSummary* locations = new (arena) LocationSummary(invoke, + LocationSummary::kNoCall, + kIntrinsified); + locations->SetInAt(0, Location::RequiresRegister()); + locations->SetInAt(1, Location::RequiresRegister()); +} + +// void libcore.io.Memory.pokeByte(long address, byte value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeByte(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sb(val, adr, 0); +} + +// void libcore.io.Memory.pokeShort(long address, short value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeShortNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sh(val, adr, 0); +} + +// void libcore.io.Memory.pokeInt(long address, int value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeIntNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sw(val, adr, 00); +} + +// void libcore.io.Memory.pokeLong(long address, long value) +void IntrinsicLocationsBuilderMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + CreateIntIntToVoidLocations(arena_, invoke); +} + +void IntrinsicCodeGeneratorMIPS64::VisitMemoryPokeLongNative(HInvoke* invoke) { + Mips64Assembler* assembler = GetAssembler(); + GpuRegister adr = invoke->GetLocations()->InAt(0).AsRegister<GpuRegister>(); + GpuRegister val = invoke->GetLocations()->InAt(1).AsRegister<GpuRegister>(); + + __ Sd(val, adr, 0); +} + +// Unimplemented intrinsics. + +#define UNIMPLEMENTED_INTRINSIC(Name) \ +void IntrinsicLocationsBuilderMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} \ +void IntrinsicCodeGeneratorMIPS64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \ +} + +UNIMPLEMENTED_INTRINSIC(MathRoundDouble) +UNIMPLEMENTED_INTRINSIC(MathRoundFloat) + +UNIMPLEMENTED_INTRINSIC(ThreadCurrentThread) +UNIMPLEMENTED_INTRINSIC(UnsafeGet) +UNIMPLEMENTED_INTRINSIC(UnsafeGetVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLong) +UNIMPLEMENTED_INTRINSIC(UnsafeGetLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObject) +UNIMPLEMENTED_INTRINSIC(UnsafeGetObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePut) +UNIMPLEMENTED_INTRINSIC(UnsafePutOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutObject) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutObjectVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafePutLong) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongOrdered) +UNIMPLEMENTED_INTRINSIC(UnsafePutLongVolatile) +UNIMPLEMENTED_INTRINSIC(UnsafeCASInt) +UNIMPLEMENTED_INTRINSIC(UnsafeCASLong) +UNIMPLEMENTED_INTRINSIC(UnsafeCASObject) +UNIMPLEMENTED_INTRINSIC(StringCharAt) +UNIMPLEMENTED_INTRINSIC(StringCompareTo) +UNIMPLEMENTED_INTRINSIC(StringEquals) +UNIMPLEMENTED_INTRINSIC(StringIndexOf) +UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromBytes) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromChars) +UNIMPLEMENTED_INTRINSIC(StringNewStringFromString) +UNIMPLEMENTED_INTRINSIC(LongRotateLeft) +UNIMPLEMENTED_INTRINSIC(LongRotateRight) +UNIMPLEMENTED_INTRINSIC(LongNumberOfTrailingZeros) +UNIMPLEMENTED_INTRINSIC(IntegerRotateLeft) +UNIMPLEMENTED_INTRINSIC(IntegerRotateRight) +UNIMPLEMENTED_INTRINSIC(IntegerNumberOfTrailingZeros) + +UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent) +UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck) +UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar) + +#undef UNIMPLEMENTED_INTRINSIC + +#undef __ + +} // namespace mips64 +} // namespace art diff --git a/compiler/optimizing/intrinsics_mips64.h b/compiler/optimizing/intrinsics_mips64.h new file mode 100644 index 0000000000..1481d24c9e --- /dev/null +++ b/compiler/optimizing/intrinsics_mips64.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ +#define ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ + +#include "intrinsics.h" + +namespace art { + +class ArenaAllocator; +class HInvokeStaticOrDirect; +class HInvokeVirtual; + +namespace mips64 { + +class CodeGeneratorMIPS64; +class Mips64Assembler; + +class IntrinsicLocationsBuilderMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicLocationsBuilderMIPS64(CodeGeneratorMIPS64* codegen); + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether + // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to + // the invoke. + bool TryDispatch(HInvoke* invoke); + + private: + ArenaAllocator* arena_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderMIPS64); +}; + +class IntrinsicCodeGeneratorMIPS64 FINAL : public IntrinsicVisitor { + public: + explicit IntrinsicCodeGeneratorMIPS64(CodeGeneratorMIPS64* codegen) : codegen_(codegen) {} + + // Define visitor methods. + +#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache) \ + void Visit ## Name(HInvoke* invoke) OVERRIDE; +#include "intrinsics_list.h" +INTRINSICS_LIST(OPTIMIZING_INTRINSICS) +#undef INTRINSICS_LIST +#undef OPTIMIZING_INTRINSICS + + private: + Mips64Assembler* GetAssembler(); + + ArenaAllocator* GetAllocator(); + + CodeGeneratorMIPS64* codegen_; + + DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorMIPS64); +}; + +} // namespace mips64 +} // namespace art + +#endif // ART_COMPILER_OPTIMIZING_INTRINSICS_MIPS64_H_ diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h index 2eeba18a4d..76bd595fc1 100644 --- a/compiler/optimizing/locations.h +++ b/compiler/optimizing/locations.h @@ -22,7 +22,6 @@ #include "base/bit_field.h" #include "base/bit_vector.h" #include "base/value_object.h" -#include "utils/growable_array.h" namespace art { diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index ef89932e3b..989970fb49 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -22,7 +22,6 @@ #include "base/bit_utils.h" #include "base/stl_util.h" #include "mirror/class-inl.h" -#include "utils/growable_array.h" #include "scoped_thread_state_change.h" namespace art { diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 6b0ccf8690..486968cf9e 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -35,7 +35,6 @@ #include "offsets.h" #include "primitive.h" #include "utils/arena_bit_vector.h" -#include "utils/growable_array.h" namespace art { @@ -2409,7 +2408,9 @@ class HCurrentMethod : public HExpression<0> { // will be the block containing the next Dex opcode. class HPackedSwitch : public HTemplateInstruction<1> { public: - HPackedSwitch(int32_t start_value, uint32_t num_entries, HInstruction* input, + HPackedSwitch(int32_t start_value, + uint32_t num_entries, + HInstruction* input, uint32_t dex_pc = kNoDexPc) : HTemplateInstruction(SideEffects::None(), dex_pc), start_value_(start_value), @@ -2430,8 +2431,8 @@ class HPackedSwitch : public HTemplateInstruction<1> { DECLARE_INSTRUCTION(PackedSwitch); private: - int32_t start_value_; - uint32_t num_entries_; + const int32_t start_value_; + const uint32_t num_entries_; DISALLOW_COPY_AND_ASSIGN(HPackedSwitch); }; @@ -5054,7 +5055,10 @@ static constexpr size_t kDefaultNumberOfMoves = 4; class HParallelMove : public HTemplateInstruction<0> { public: explicit HParallelMove(ArenaAllocator* arena, uint32_t dex_pc = kNoDexPc) - : HTemplateInstruction(SideEffects::None(), dex_pc), moves_(arena, kDefaultNumberOfMoves) {} + : HTemplateInstruction(SideEffects::None(), dex_pc), + moves_(arena->Adapter(kArenaAllocMoveOperands)) { + moves_.reserve(kDefaultNumberOfMoves); + } void AddMove(Location source, Location destination, @@ -5064,15 +5068,15 @@ class HParallelMove : public HTemplateInstruction<0> { DCHECK(destination.IsValid()); if (kIsDebugBuild) { if (instruction != nullptr) { - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - if (moves_.Get(i).GetInstruction() == instruction) { + for (const MoveOperands& move : moves_) { + if (move.GetInstruction() == instruction) { // Special case the situation where the move is for the spill slot // of the instruction. if ((GetPrevious() == instruction) || ((GetPrevious() == nullptr) && instruction->IsPhi() && instruction->GetBlock() == GetBlock())) { - DCHECK_NE(destination.GetKind(), moves_.Get(i).GetDestination().GetKind()) + DCHECK_NE(destination.GetKind(), move.GetDestination().GetKind()) << "Doing parallel moves for the same instruction."; } else { DCHECK(false) << "Doing parallel moves for the same instruction."; @@ -5080,26 +5084,27 @@ class HParallelMove : public HTemplateInstruction<0> { } } } - for (size_t i = 0, e = moves_.Size(); i < e; ++i) { - DCHECK(!destination.OverlapsWith(moves_.Get(i).GetDestination())) + for (const MoveOperands& move : moves_) { + DCHECK(!destination.OverlapsWith(move.GetDestination())) << "Overlapped destination for two moves in a parallel move: " - << moves_.Get(i).GetSource() << " ==> " << moves_.Get(i).GetDestination() << " and " + << move.GetSource() << " ==> " << move.GetDestination() << " and " << source << " ==> " << destination; } } - moves_.Add(MoveOperands(source, destination, type, instruction)); + moves_.emplace_back(source, destination, type, instruction); } - MoveOperands* MoveOperandsAt(size_t index) const { - return moves_.GetRawStorage() + index; + MoveOperands* MoveOperandsAt(size_t index) { + DCHECK_LT(index, moves_.size()); + return &moves_[index]; } - size_t NumMoves() const { return moves_.Size(); } + size_t NumMoves() const { return moves_.size(); } DECLARE_INSTRUCTION(ParallelMove); private: - GrowableArray<MoveOperands> moves_; + ArenaVector<MoveOperands> moves_; DISALLOW_COPY_AND_ASSIGN(HParallelMove); }; diff --git a/compiler/optimizing/parallel_move_resolver.cc b/compiler/optimizing/parallel_move_resolver.cc index f9d812f6a6..fce776920d 100644 --- a/compiler/optimizing/parallel_move_resolver.cc +++ b/compiler/optimizing/parallel_move_resolver.cc @@ -16,6 +16,8 @@ #include <iostream> #include "parallel_move_resolver.h" + +#include "base/stl_util.h" #include "nodes.h" namespace art { @@ -28,19 +30,19 @@ void ParallelMoveResolver::BuildInitialMoveList(HParallelMove* parallel_move) { for (size_t i = 0; i < parallel_move->NumMoves(); ++i) { MoveOperands* move = parallel_move->MoveOperandsAt(i); if (!move->IsRedundant()) { - moves_.Add(move); + moves_.push_back(move); } } } void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) { - DCHECK(moves_.IsEmpty()); + DCHECK(moves_.empty()); // Build up a worklist of moves. BuildInitialMoveList(parallel_move); // Move stack/stack slot to take advantage of a free register on constrained machines. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Ignore constants and moves already eliminated. if (move.IsEliminated() || move.GetSource().IsConstant()) { continue; @@ -52,8 +54,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves // and skipping such moves with register destinations keeps those // registers free for the whole algorithm. @@ -63,8 +65,8 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } // Perform the moves with constant sources. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { DCHECK(move->GetSource().IsConstant()); EmitMove(i); @@ -73,7 +75,7 @@ void ParallelMoveResolverWithSwap::EmitNativeCode(HParallelMove* parallel_move) } } - moves_.Reset(); + moves_.clear(); } Location LowOf(Location location) { @@ -123,7 +125,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // which means that a call to PerformMove could change any source operand // in the move graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); if (move->IsRedundant()) { // Because we swap register pairs first, following, un-pending @@ -143,8 +146,8 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // as this one's destination blocks this one so recursively perform all // such moves. MoveOperands* required_swap = nullptr; - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { // Though PerformMove can change any source operand in the move graph, // calling `PerformMove` cannot create a blocking move via a swap @@ -163,7 +166,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { // at the next moves. Swapping is not blocked by anything, it just // updates other moves's source. break; - } else if (required_swap == moves_.Get(i)) { + } else if (required_swap == moves_[i]) { // If `other_move` was swapped, we iterate again to find a new // potential cycle. required_swap = nullptr; @@ -171,7 +174,7 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } else if (required_swap != nullptr) { // A move is required to swap. We walk back the cycle to find the // move by just returning from this `PerforrmMove`. - moves_.Get(index)->ClearPending(destination); + moves_[index]->ClearPending(destination); return required_swap; } } @@ -197,14 +200,13 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { DCHECK_EQ(required_swap, move); do_swap = true; } else { - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(destination)) { - DCHECK(other_move.IsPending()); - if (!move->Is64BitMove() && other_move.Is64BitMove()) { + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(destination)) { + DCHECK(other_move->IsPending()); + if (!move->Is64BitMove() && other_move->Is64BitMove()) { // We swap 64bits moves before swapping 32bits moves. Go back from the // cycle by returning the move that must be swapped. - return moves_.Get(i); + return other_move; } do_swap = true; break; @@ -220,12 +222,11 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { Location source = move->GetSource(); Location swap_destination = move->GetDestination(); move->Eliminate(); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); - if (other_move.Blocks(source)) { - UpdateSourceOf(moves_.Get(i), source, swap_destination); - } else if (other_move.Blocks(swap_destination)) { - UpdateSourceOf(moves_.Get(i), swap_destination, source); + for (MoveOperands* other_move : moves_) { + if (other_move->Blocks(source)) { + UpdateSourceOf(other_move, source, swap_destination); + } else if (other_move->Blocks(swap_destination)) { + UpdateSourceOf(other_move, swap_destination, source); } } // If the swap was required because of a 64bits move in the middle of a cycle, @@ -242,14 +243,14 @@ MoveOperands* ParallelMoveResolverWithSwap::PerformMove(size_t index) { } bool ParallelMoveResolverWithSwap::IsScratchLocation(Location loc) { - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return false; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->GetDestination().Equals(loc)) { + for (MoveOperands* move : moves_) { + if (move->GetDestination().Equals(loc)) { return true; } } @@ -302,8 +303,8 @@ ParallelMoveResolverWithSwap::ScratchRegisterScope::~ScratchRegisterScope() { void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { DCHECK_EQ(GetNumberOfPendingMoves(), 0u); - DCHECK(moves_.IsEmpty()); - DCHECK(scratches_.IsEmpty()); + DCHECK(moves_.empty()); + DCHECK(scratches_.empty()); // Backend dependent initialization. PrepareForEmitNativeCode(); @@ -311,8 +312,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Build up a worklist of moves. BuildInitialMoveList(parallel_move); - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& move = *moves_[i]; // Skip constants to perform them last. They don't block other moves and // skipping such moves with register destinations keeps those registers // free for the whole algorithm. @@ -324,8 +325,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Perform the moves with constant sources and register destinations with UpdateMoveSource() // to reduce the number of literal loads. Stack destinations are skipped since we won't be benefit // from changing the constant sources to stack locations. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; Location destination = move->GetDestination(); if (!move->IsEliminated() && !destination.IsStackSlot() && !destination.IsDoubleStackSlot()) { Location source = move->GetSource(); @@ -344,8 +345,8 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { } // Perform the rest of the moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + MoveOperands* move = moves_[i]; if (!move->IsEliminated()) { EmitMove(i); move->Eliminate(); @@ -358,19 +359,18 @@ void ParallelMoveResolverNoSwap::EmitNativeCode(HParallelMove* parallel_move) { // Backend dependent cleanup. FinishEmitNativeCode(); - moves_.Reset(); - scratches_.Reset(); + moves_.clear(); + scratches_.clear(); } Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location loc = scratches_.Get(i); + for (Location loc : scratches_) { if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - Location loc = moves_.Get(i)->GetDestination(); + for (MoveOperands* move : moves_) { + Location loc = move->GetDestination(); if (loc.GetKind() == kind && !IsBlockedByMoves(loc)) { return loc; } @@ -380,18 +380,18 @@ Location ParallelMoveResolverNoSwap::GetScratchLocation(Location::Kind kind) { void ParallelMoveResolverNoSwap::AddScratchLocation(Location loc) { if (kIsDebugBuild) { - for (size_t i = 0; i < scratches_.Size(); ++i) { - DCHECK(!loc.Equals(scratches_.Get(i))); + for (Location scratch : scratches_) { + CHECK(!loc.Equals(scratch)); } } - scratches_.Add(loc); + scratches_.push_back(loc); } void ParallelMoveResolverNoSwap::RemoveScratchLocation(Location loc) { DCHECK(!IsBlockedByMoves(loc)); - for (size_t i = 0; i < scratches_.Size(); ++i) { - if (loc.Equals(scratches_.Get(i))) { - scratches_.DeleteAt(i); + for (auto it = scratches_.begin(), end = scratches_.end(); it != end; ++it) { + if (loc.Equals(*it)) { + scratches_.erase(it); break; } } @@ -406,7 +406,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // we will update source operand in the move graph to reduce dependencies in // the graph. - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; DCHECK(!move->IsPending()); DCHECK(!move->IsEliminated()); if (move->IsRedundant()) { @@ -433,8 +434,8 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { // dependencies. Any unperformed, unpending move with a source the same // as this one's destination blocks this one so recursively perform all // such moves. - for (size_t i = 0; i < moves_.Size(); ++i) { - const MoveOperands& other_move = *moves_.Get(i); + for (size_t i = 0; i < moves_.size(); ++i) { + const MoveOperands& other_move = *moves_[i]; if (other_move.Blocks(destination) && !other_move.IsPending()) { PerformMove(i); } @@ -490,8 +491,11 @@ void ParallelMoveResolverNoSwap::PerformMove(size_t index) { move->Eliminate(); UpdateMoveSource(pending_source, pending_destination); // Free any unblocked locations in the scratch location list. - for (size_t i = 0; i < scratches_.Size(); ++i) { - Location scratch = scratches_.Get(i); + // Note: Fetch size() on each iteration because scratches_ can be modified inside the loop. + // FIXME: If FreeScratchLocation() removes the location from scratches_, + // we skip the next location. This happens for arm64. + for (size_t i = 0; i < scratches_.size(); ++i) { + Location scratch = scratches_[i]; // Only scratch overlapping with performed move source can be unblocked. if (scratch.OverlapsWith(pending_source) && !IsBlockedByMoves(scratch)) { FreeScratchLocation(pending_source); @@ -512,8 +516,7 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { // This is not something we must do, but we can use fewer scratch locations with // this trick. For example, we can avoid using additional scratch locations for // moves (0 -> 1), (1 -> 2), (1 -> 0). - for (size_t i = 0; i < moves_.Size(); ++i) { - MoveOperands* move = moves_.Get(i); + for (MoveOperands* move : moves_) { if (move->GetSource().Equals(from)) { move->SetSource(to); } @@ -522,16 +525,15 @@ void ParallelMoveResolverNoSwap::UpdateMoveSource(Location from, Location to) { void ParallelMoveResolverNoSwap::AddPendingMove(Location source, Location destination, Primitive::Type type) { - pending_moves_.Add(new (allocator_) MoveOperands(source, destination, type, nullptr)); + pending_moves_.push_back(new (allocator_) MoveOperands(source, destination, type, nullptr)); } void ParallelMoveResolverNoSwap::DeletePendingMove(MoveOperands* move) { - pending_moves_.Delete(move); + RemoveElement(pending_moves_, move); } MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - MoveOperands* move = pending_moves_.Get(i); + for (MoveOperands* move : pending_moves_) { Location destination = move->GetDestination(); // Only moves with destination overlapping with input loc can be unblocked. if (destination.OverlapsWith(loc) && !IsBlockedByMoves(destination)) { @@ -542,13 +544,13 @@ MoveOperands* ParallelMoveResolverNoSwap::GetUnblockedPendingMove(Location loc) } bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { - for (size_t i = 0; i < pending_moves_.Size(); ++i) { - if (pending_moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : pending_moves_) { + if (move->Blocks(loc)) { return true; } } - for (size_t i = 0; i < moves_.Size(); ++i) { - if (moves_.Get(i)->Blocks(loc)) { + for (MoveOperands* move : moves_) { + if (move->Blocks(loc)) { return true; } } @@ -558,7 +560,7 @@ bool ParallelMoveResolverNoSwap::IsBlockedByMoves(Location loc) { // So far it is only used for debugging purposes to make sure all pending moves // have been performed. size_t ParallelMoveResolverNoSwap::GetNumberOfPendingMoves() { - return pending_moves_.Size(); + return pending_moves_.size(); } } // namespace art diff --git a/compiler/optimizing/parallel_move_resolver.h b/compiler/optimizing/parallel_move_resolver.h index 9ede91013e..4278861690 100644 --- a/compiler/optimizing/parallel_move_resolver.h +++ b/compiler/optimizing/parallel_move_resolver.h @@ -17,8 +17,8 @@ #ifndef ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ #define ART_COMPILER_OPTIMIZING_PARALLEL_MOVE_RESOLVER_H_ +#include "base/arena_containers.h" #include "base/value_object.h" -#include "utils/growable_array.h" #include "locations.h" #include "primitive.h" @@ -31,7 +31,10 @@ class MoveOperands; // have their own subclass that implements corresponding virtual functions. class ParallelMoveResolver : public ValueObject { public: - explicit ParallelMoveResolver(ArenaAllocator* allocator) : moves_(allocator, 32) {} + explicit ParallelMoveResolver(ArenaAllocator* allocator) + : moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)) { + moves_.reserve(32); + } virtual ~ParallelMoveResolver() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -41,7 +44,7 @@ class ParallelMoveResolver : public ValueObject { // Build the initial list of moves. void BuildInitialMoveList(HParallelMove* parallel_move); - GrowableArray<MoveOperands*> moves_; + ArenaVector<MoveOperands*> moves_; private: DISALLOW_COPY_AND_ASSIGN(ParallelMoveResolver); @@ -120,8 +123,13 @@ class ParallelMoveResolverWithSwap : public ParallelMoveResolver { class ParallelMoveResolverNoSwap : public ParallelMoveResolver { public: explicit ParallelMoveResolverNoSwap(ArenaAllocator* allocator) - : ParallelMoveResolver(allocator), scratches_(allocator, 32), - pending_moves_(allocator, 8), allocator_(allocator) {} + : ParallelMoveResolver(allocator), + scratches_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + pending_moves_(allocator->Adapter(kArenaAllocParallelMoveResolver)), + allocator_(allocator) { + scratches_.reserve(32); + pending_moves_.reserve(8); + } virtual ~ParallelMoveResolverNoSwap() {} // Resolve a set of parallel moves, emitting assembler instructions. @@ -160,7 +168,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { void RemoveScratchLocation(Location loc); // List of scratch locations. - GrowableArray<Location> scratches_; + ArenaVector<Location> scratches_; private: // Perform the move at the given index in `moves_` (possibly requiring other moves to satisfy @@ -183,7 +191,7 @@ class ParallelMoveResolverNoSwap : public ParallelMoveResolver { size_t GetNumberOfPendingMoves(); // Additional pending moves which might be added to resolve dependency cycle. - GrowableArray<MoveOperands*> pending_moves_; + ArenaVector<MoveOperands*> pending_moves_; // Used to allocate pending MoveOperands. ArenaAllocator* const allocator_; diff --git a/compiler/optimizing/parallel_move_test.cc b/compiler/optimizing/parallel_move_test.cc index f8f70105cf..da91cb811d 100644 --- a/compiler/optimizing/parallel_move_test.cc +++ b/compiler/optimizing/parallel_move_test.cc @@ -56,7 +56,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { : ParallelMoveResolverWithSwap(allocator) {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -68,7 +69,8 @@ class TestParallelMoveResolverWithSwap : public ParallelMoveResolverWithSwap { } void EmitSwap(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } @@ -127,7 +129,8 @@ class TestParallelMoveResolverNoSwap : public ParallelMoveResolverNoSwap { void FreeScratchLocation(Location loc ATTRIBUTE_UNUSED) OVERRIDE {} void EmitMove(size_t index) OVERRIDE { - MoveOperands* move = moves_.Get(index); + DCHECK_LT(index, moves_.size()); + MoveOperands* move = moves_[index]; if (!message_.str().empty()) { message_ << " "; } diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc index ad8c682b3a..40c75af6ef 100644 --- a/compiler/optimizing/ssa_builder.cc +++ b/compiler/optimizing/ssa_builder.cc @@ -56,6 +56,24 @@ class DeadPhiHandling : public ValueObject { DISALLOW_COPY_AND_ASSIGN(DeadPhiHandling); }; +static bool HasConflictingEquivalent(HPhi* phi) { + if (phi->GetNext() == nullptr) { + return false; + } + HPhi* next = phi->GetNext()->AsPhi(); + if (next->GetRegNumber() == phi->GetRegNumber()) { + if (next->GetType() == Primitive::kPrimVoid) { + // We only get a void type for an equivalent phi we processed and found out + // it was conflicting. + return true; + } else { + // Go to the next phi, in case it is also an equivalent. + return HasConflictingEquivalent(next); + } + } + return false; +} + bool DeadPhiHandling::UpdateType(HPhi* phi) { if (phi->IsDead()) { // Phi was rendered dead while waiting in the worklist because it was replaced @@ -87,21 +105,26 @@ bool DeadPhiHandling::UpdateType(HPhi* phi) { if (new_type == Primitive::kPrimVoid) { new_type = input_type; } else if (new_type == Primitive::kPrimNot && input_type == Primitive::kPrimInt) { + if (input->IsPhi() && HasConflictingEquivalent(input->AsPhi())) { + // If we already asked for an equivalent of the input phi, but that equivalent + // ended up conflicting, make this phi conflicting too. + conflict = true; + break; + } HInstruction* equivalent = SsaBuilder::GetReferenceTypeEquivalent(input); if (equivalent == nullptr) { conflict = true; break; - } else { - phi->ReplaceInput(equivalent, i); - if (equivalent->IsPhi()) { - DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); - // We created a new phi, but that phi has the same inputs as the old phi. We - // add it to the worklist to ensure its inputs can also be converted to reference. - // If not, it will remain dead, and the algorithm will make the current phi dead - // as well. - equivalent->AsPhi()->SetLive(); - AddToWorklist(equivalent->AsPhi()); - } + } + phi->ReplaceInput(equivalent, i); + if (equivalent->IsPhi()) { + DCHECK_EQ(equivalent->GetType(), Primitive::kPrimNot); + // We created a new phi, but that phi has the same inputs as the old phi. We + // add it to the worklist to ensure its inputs can also be converted to reference. + // If not, it will remain dead, and the algorithm will make the current phi dead + // as well. + equivalent->AsPhi()->SetLive(); + AddToWorklist(equivalent->AsPhi()); } } else if (new_type == Primitive::kPrimInt && input_type == Primitive::kPrimNot) { new_type = Primitive::kPrimNot; @@ -145,8 +168,14 @@ void DeadPhiHandling::VisitBasicBlock(HBasicBlock* block) { if (phi->IsDead() && phi->HasEnvironmentUses()) { phi->SetLive(); if (block->IsLoopHeader()) { - // Give a type to the loop phi, to guarantee convergence of the algorithm. - phi->SetType(phi->InputAt(0)->GetType()); + // Give a type to the loop phi to guarantee convergence of the algorithm. + // Note that the dead phi may already have a type if it is an equivalent + // generated for a typed LoadLocal. In that case we do not change the + // type because it could lead to an unsupported PrimNot/Float/Double -> + // PrimInt/Long transition and create same type equivalents. + if (phi->GetType() == Primitive::kPrimVoid) { + phi->SetType(phi->InputAt(0)->GetType()); + } AddToWorklist(phi); } else { // Because we are doing a reverse post order visit, all inputs of diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 1f0bac59e0..f27cecc8fa 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -30,8 +30,8 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, current_entry_.sp_mask = sp_mask; current_entry_.num_dex_registers = num_dex_registers; current_entry_.inlining_depth = inlining_depth; - current_entry_.dex_register_locations_start_index = dex_register_locations_.Size(); - current_entry_.inline_infos_start_index = inline_infos_.Size(); + current_entry_.dex_register_locations_start_index = dex_register_locations_.size(); + current_entry_.inline_infos_start_index = inline_infos_.size(); current_entry_.dex_register_map_hash = 0; current_entry_.same_dex_register_map_as_ = kNoSameDexMapFound; if (num_dex_registers != 0) { @@ -55,7 +55,7 @@ void StackMapStream::BeginStackMapEntry(uint32_t dex_pc, void StackMapStream::EndStackMapEntry() { current_entry_.same_dex_register_map_as_ = FindEntryWithTheSameDexMap(); - stack_maps_.Add(current_entry_); + stack_maps_.push_back(current_entry_); current_entry_ = StackMapEntry(); } @@ -73,12 +73,12 @@ void StackMapStream::AddDexRegisterEntry(DexRegisterLocation::Kind kind, int32_t auto it = location_catalog_entries_indices_.Find(location); if (it != location_catalog_entries_indices_.end()) { // Retrieve the index from the hash map. - dex_register_locations_.Add(it->second); + dex_register_locations_.push_back(it->second); } else { // Create a new entry in the location catalog and the hash map. - size_t index = location_catalog_entries_.Size(); - location_catalog_entries_.Add(location); - dex_register_locations_.Add(index); + size_t index = location_catalog_entries_.size(); + location_catalog_entries_.push_back(location); + dex_register_locations_.push_back(index); location_catalog_entries_indices_.Insert(std::make_pair(location, index)); } @@ -108,7 +108,7 @@ void StackMapStream::BeginInlineInfoEntry(uint32_t method_index, current_inline_info_.dex_pc = dex_pc; current_inline_info_.invoke_type = invoke_type; current_inline_info_.num_dex_registers = num_dex_registers; - current_inline_info_.dex_register_locations_start_index = dex_register_locations_.Size(); + current_inline_info_.dex_register_locations_start_index = dex_register_locations_.size(); if (num_dex_registers != 0) { current_inline_info_.live_dex_registers_mask = new (allocator_) ArenaBitVector(allocator_, num_dex_registers, true); @@ -123,14 +123,14 @@ void StackMapStream::EndInlineInfoEntry() { DCHECK_EQ(current_dex_register_, current_inline_info_.num_dex_registers) << "Inline information contains less registers than expected"; in_inline_frame_ = false; - inline_infos_.Add(current_inline_info_); + inline_infos_.push_back(current_inline_info_); current_inline_info_ = InlineInfoEntry(); } uint32_t StackMapStream::ComputeMaxNativePcOffset() const { uint32_t max_native_pc_offset = 0u; - for (size_t i = 0, size = stack_maps_.Size(); i != size; ++i) { - max_native_pc_offset = std::max(max_native_pc_offset, stack_maps_.Get(i).native_pc_offset); + for (const StackMapEntry& entry : stack_maps_) { + max_native_pc_offset = std::max(max_native_pc_offset, entry.native_pc_offset); } return max_native_pc_offset; } @@ -147,7 +147,7 @@ size_t StackMapStream::PrepareForFillIn() { dex_pc_max_, max_native_pc_offset, register_mask_max_); - stack_maps_size_ = stack_maps_.Size() * stack_map_encoding_.ComputeStackMapSize(); + stack_maps_size_ = stack_maps_.size() * stack_map_encoding_.ComputeStackMapSize(); dex_register_location_catalog_size_ = ComputeDexRegisterLocationCatalogSize(); // Note: use RoundUp to word-size here if you want CodeInfo objects to be word aligned. @@ -170,33 +170,28 @@ size_t StackMapStream::PrepareForFillIn() { size_t StackMapStream::ComputeDexRegisterLocationCatalogSize() const { size_t size = DexRegisterLocationCatalog::kFixedSize; - for (size_t location_catalog_entry_index = 0; - location_catalog_entry_index < location_catalog_entries_.Size(); - ++location_catalog_entry_index) { - DexRegisterLocation dex_register_location = - location_catalog_entries_.Get(location_catalog_entry_index); + for (const DexRegisterLocation& dex_register_location : location_catalog_entries_) { size += DexRegisterLocationCatalog::EntrySize(dex_register_location); } return size; } size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const { + const BitVector* live_dex_registers_mask) const { + // For num_dex_registers == 0u live_dex_registers_mask may be null. + if (num_dex_registers == 0u) { + return 0u; // No register map will be emitted. + } + DCHECK(live_dex_registers_mask != nullptr); + // Size of the map in bytes. size_t size = DexRegisterMap::kFixedSize; // Add the live bit mask for the Dex register liveness. size += DexRegisterMap::GetLiveBitMaskSize(num_dex_registers); // Compute the size of the set of live Dex register entries. - size_t number_of_live_dex_registers = 0; - for (size_t dex_register_number = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - ++number_of_live_dex_registers; - } - } + size_t number_of_live_dex_registers = live_dex_registers_mask->NumSetBits(); size_t map_entries_size_in_bits = - DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.Size()) + DexRegisterMap::SingleEntrySizeInBits(location_catalog_entries_.size()) * number_of_live_dex_registers; size_t map_entries_size_in_bytes = RoundUp(map_entries_size_in_bits, kBitsPerByte) / kBitsPerByte; @@ -207,24 +202,24 @@ size_t StackMapStream::ComputeDexRegisterMapSize(uint32_t num_dex_registers, size_t StackMapStream::ComputeDexRegisterMapsSize() const { size_t size = 0; size_t inline_info_index = 0; - for (size_t i = 0; i < stack_maps_.Size(); ++i) { - StackMapEntry entry = stack_maps_.Get(i); + for (const StackMapEntry& entry : stack_maps_) { if (entry.same_dex_register_map_as_ == kNoSameDexMapFound) { - size += ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask); + size += ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask); } else { // Entries with the same dex map will have the same offset. } for (size_t j = 0; j < entry.inlining_depth; ++j) { - InlineInfoEntry inline_entry = inline_infos_.Get(inline_info_index++); + DCHECK_LT(inline_info_index, inline_infos_.size()); + InlineInfoEntry inline_entry = inline_infos_[inline_info_index++]; size += ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask); + inline_entry.live_dex_registers_mask); } } return size; } size_t StackMapStream::ComputeInlineInfoSize() const { - return inline_infos_.Size() * InlineInfo::SingleEntrySize() + return inline_infos_.size() * InlineInfo::SingleEntrySize() // For encoding the depth. + (number_of_stack_maps_with_inline_info_ * InlineInfo::kFixedSize); } @@ -244,19 +239,18 @@ void StackMapStream::FillIn(MemoryRegion region) { inline_infos_start_, inline_info_size_); code_info.SetEncoding(stack_map_encoding_); - code_info.SetNumberOfStackMaps(stack_maps_.Size()); + code_info.SetNumberOfStackMaps(stack_maps_.size()); DCHECK_EQ(code_info.GetStackMapsSize(code_info.ExtractEncoding()), stack_maps_size_); // Set the Dex register location catalog. - code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.Size()); + code_info.SetNumberOfLocationCatalogEntries(location_catalog_entries_.size()); MemoryRegion dex_register_location_catalog_region = region.Subregion( dex_register_location_catalog_start_, dex_register_location_catalog_size_); DexRegisterLocationCatalog dex_register_location_catalog(dex_register_location_catalog_region); // Offset in `dex_register_location_catalog` where to store the next // register location. size_t location_catalog_offset = DexRegisterLocationCatalog::kFixedSize; - for (size_t i = 0, e = location_catalog_entries_.Size(); i < e; ++i) { - DexRegisterLocation dex_register_location = location_catalog_entries_.Get(i); + for (DexRegisterLocation dex_register_location : location_catalog_entries_) { dex_register_location_catalog.SetRegisterInfo(location_catalog_offset, dex_register_location); location_catalog_offset += DexRegisterLocationCatalog::EntrySize(dex_register_location); } @@ -265,9 +259,9 @@ void StackMapStream::FillIn(MemoryRegion region) { uintptr_t next_dex_register_map_offset = 0; uintptr_t next_inline_info_offset = 0; - for (size_t i = 0, e = stack_maps_.Size(); i < e; ++i) { + for (size_t i = 0, e = stack_maps_.size(); i < e; ++i) { StackMap stack_map = code_info.GetStackMapAt(i, stack_map_encoding_); - StackMapEntry entry = stack_maps_.Get(i); + StackMapEntry entry = stack_maps_[i]; stack_map.SetDexPc(stack_map_encoding_, entry.dex_pc); stack_map.SetNativePcOffset(stack_map_encoding_, entry.native_pc_offset); @@ -291,7 +285,7 @@ void StackMapStream::FillIn(MemoryRegion region) { // New dex registers maps should be added to the stack map. MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, - ComputeDexRegisterMapSize(entry.num_dex_registers, *entry.live_dex_registers_mask)); + ComputeDexRegisterMapSize(entry.num_dex_registers, entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); stack_map.SetDexRegisterMapOffset( @@ -318,8 +312,9 @@ void StackMapStream::FillIn(MemoryRegion region) { stack_map_encoding_, inline_region.start() - dex_register_locations_region.start()); inline_info.SetDepth(entry.inlining_depth); + DCHECK_LE(entry.inline_infos_start_index + entry.inlining_depth, inline_infos_.size()); for (size_t depth = 0; depth < entry.inlining_depth; ++depth) { - InlineInfoEntry inline_entry = inline_infos_.Get(depth + entry.inline_infos_start_index); + InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index]; inline_info.SetMethodIndexAtDepth(depth, inline_entry.method_index); inline_info.SetDexPcAtDepth(depth, inline_entry.dex_pc); inline_info.SetInvokeTypeAtDepth(depth, inline_entry.invoke_type); @@ -331,7 +326,7 @@ void StackMapStream::FillIn(MemoryRegion region) { MemoryRegion register_region = dex_register_locations_region.Subregion( next_dex_register_map_offset, ComputeDexRegisterMapSize(inline_entry.num_dex_registers, - *inline_entry.live_dex_registers_mask)); + inline_entry.live_dex_registers_mask)); next_dex_register_map_offset += register_region.size(); DexRegisterMap dex_register_map(register_region); inline_info.SetDexRegisterMapOffsetAtDepth( @@ -357,42 +352,43 @@ void StackMapStream::FillInDexRegisterMap(DexRegisterMap dex_register_map, uint32_t start_index_in_dex_register_locations) const { dex_register_map.SetLiveBitMask(num_dex_registers, live_dex_registers_mask); // Set the dex register location mapping data. - for (size_t dex_register_number = 0, index_in_dex_register_locations = 0; - dex_register_number < num_dex_registers; - ++dex_register_number) { - if (live_dex_registers_mask.IsBitSet(dex_register_number)) { - size_t location_catalog_entry_index = dex_register_locations_.Get( - start_index_in_dex_register_locations + index_in_dex_register_locations); - dex_register_map.SetLocationCatalogEntryIndex( - index_in_dex_register_locations, - location_catalog_entry_index, - num_dex_registers, - location_catalog_entries_.Size()); - ++index_in_dex_register_locations; - } + size_t number_of_live_dex_registers = live_dex_registers_mask.NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(start_index_in_dex_register_locations, + dex_register_locations_.size() - number_of_live_dex_registers); + for (size_t index_in_dex_register_locations = 0; + index_in_dex_register_locations != number_of_live_dex_registers; + ++index_in_dex_register_locations) { + size_t location_catalog_entry_index = dex_register_locations_[ + start_index_in_dex_register_locations + index_in_dex_register_locations]; + dex_register_map.SetLocationCatalogEntryIndex( + index_in_dex_register_locations, + location_catalog_entry_index, + num_dex_registers, + location_catalog_entries_.size()); } } size_t StackMapStream::FindEntryWithTheSameDexMap() { - size_t current_entry_index = stack_maps_.Size(); + size_t current_entry_index = stack_maps_.size(); auto entries_it = dex_map_hash_to_stack_map_indices_.find(current_entry_.dex_register_map_hash); if (entries_it == dex_map_hash_to_stack_map_indices_.end()) { // We don't have a perfect hash functions so we need a list to collect all stack maps // which might have the same dex register map. - GrowableArray<uint32_t> stack_map_indices(allocator_, 1); - stack_map_indices.Add(current_entry_index); - dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, stack_map_indices); + ArenaVector<uint32_t> stack_map_indices(allocator_->Adapter(kArenaAllocStackMapStream)); + stack_map_indices.push_back(current_entry_index); + dex_map_hash_to_stack_map_indices_.Put(current_entry_.dex_register_map_hash, + std::move(stack_map_indices)); return kNoSameDexMapFound; } // We might have collisions, so we need to check whether or not we really have a match. - for (size_t i = 0; i < entries_it->second.Size(); i++) { - size_t test_entry_index = entries_it->second.Get(i); - if (HaveTheSameDexMaps(stack_maps_.Get(test_entry_index), current_entry_)) { + for (uint32_t test_entry_index : entries_it->second) { + if (HaveTheSameDexMaps(GetStackMap(test_entry_index), current_entry_)) { return test_entry_index; } } - entries_it->second.Add(current_entry_index); + entries_it->second.push_back(current_entry_index); return kNoSameDexMapFound; } @@ -406,21 +402,22 @@ bool StackMapStream::HaveTheSameDexMaps(const StackMapEntry& a, const StackMapEn if (a.num_dex_registers != b.num_dex_registers) { return false; } - - int index_in_dex_register_locations = 0; - for (uint32_t i = 0; i < a.num_dex_registers; i++) { - if (a.live_dex_registers_mask->IsBitSet(i) != b.live_dex_registers_mask->IsBitSet(i)) { + if (a.num_dex_registers != 0u) { + DCHECK(a.live_dex_registers_mask != nullptr); + DCHECK(b.live_dex_registers_mask != nullptr); + if (!a.live_dex_registers_mask->Equal(b.live_dex_registers_mask)) { return false; } - if (a.live_dex_registers_mask->IsBitSet(i)) { - size_t a_loc = dex_register_locations_.Get( - a.dex_register_locations_start_index + index_in_dex_register_locations); - size_t b_loc = dex_register_locations_.Get( - b.dex_register_locations_start_index + index_in_dex_register_locations); - if (a_loc != b_loc) { - return false; - } - ++index_in_dex_register_locations; + size_t number_of_live_dex_registers = a.live_dex_registers_mask->NumSetBits(); + DCHECK_LE(number_of_live_dex_registers, dex_register_locations_.size()); + DCHECK_LE(a.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + DCHECK_LE(b.dex_register_locations_start_index, + dex_register_locations_.size() - number_of_live_dex_registers); + auto a_begin = dex_register_locations_.begin() + a.dex_register_locations_start_index; + auto b_begin = dex_register_locations_.begin() + b.dex_register_locations_start_index; + if (!std::equal(a_begin, a_begin + number_of_live_dex_registers, b_begin)) { + return false; } } return true; diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index 703b6f7e13..4783e283b3 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -24,7 +24,6 @@ #include "memory_region.h" #include "nodes.h" #include "stack_map.h" -#include "utils/growable_array.h" namespace art { @@ -62,15 +61,16 @@ class StackMapStream : public ValueObject { public: explicit StackMapStream(ArenaAllocator* allocator) : allocator_(allocator), - stack_maps_(allocator, 10), - location_catalog_entries_(allocator, 4), - dex_register_locations_(allocator, 10 * 4), - inline_infos_(allocator, 2), + stack_maps_(allocator->Adapter(kArenaAllocStackMapStream)), + location_catalog_entries_(allocator->Adapter(kArenaAllocStackMapStream)), + dex_register_locations_(allocator->Adapter(kArenaAllocStackMapStream)), + inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)), stack_mask_max_(-1), dex_pc_max_(0), register_mask_max_(0), number_of_stack_maps_with_inline_info_(0), - dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), allocator->Adapter()), + dex_map_hash_to_stack_map_indices_(std::less<uint32_t>(), + allocator->Adapter(kArenaAllocStackMapStream)), current_entry_(), current_inline_info_(), stack_mask_size_(0), @@ -84,7 +84,12 @@ class StackMapStream : public ValueObject { inline_infos_start_(0), needed_size_(0), current_dex_register_(0), - in_inline_frame_(false) {} + in_inline_frame_(false) { + stack_maps_.reserve(10); + location_catalog_entries_.reserve(4); + dex_register_locations_.reserve(10 * 4); + inline_infos_.reserve(2); + } // See runtime/stack_map.h to know what these fields contain. struct StackMapEntry { @@ -127,17 +132,17 @@ class StackMapStream : public ValueObject { void EndInlineInfoEntry(); size_t GetNumberOfStackMaps() const { - return stack_maps_.Size(); + return stack_maps_.size(); } const StackMapEntry& GetStackMap(size_t i) const { - DCHECK_LT(i, stack_maps_.Size()); - return stack_maps_.GetRawStorage()[i]; + DCHECK_LT(i, stack_maps_.size()); + return stack_maps_[i]; } void SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offset) { - DCHECK_LT(i, stack_maps_.Size()); - stack_maps_.GetRawStorage()[i].native_pc_offset = native_pc_offset; + DCHECK_LT(i, stack_maps_.size()); + stack_maps_[i].native_pc_offset = native_pc_offset; } uint32_t ComputeMaxNativePcOffset() const; @@ -150,7 +155,7 @@ class StackMapStream : public ValueObject { private: size_t ComputeDexRegisterLocationCatalogSize() const; size_t ComputeDexRegisterMapSize(uint32_t num_dex_registers, - const BitVector& live_dex_registers_mask) const; + const BitVector* live_dex_registers_mask) const; size_t ComputeDexRegisterMapsSize() const; size_t ComputeInlineInfoSize() const; @@ -164,10 +169,10 @@ class StackMapStream : public ValueObject { uint32_t start_index_in_dex_register_locations) const; ArenaAllocator* allocator_; - GrowableArray<StackMapEntry> stack_maps_; + ArenaVector<StackMapEntry> stack_maps_; // A catalog of unique [location_kind, register_value] pairs (per method). - GrowableArray<DexRegisterLocation> location_catalog_entries_; + ArenaVector<DexRegisterLocation> location_catalog_entries_; // Map from Dex register location catalog entries to their indices in the // location catalog. typedef HashMap<DexRegisterLocation, size_t, LocationCatalogEntriesIndicesEmptyFn, @@ -175,14 +180,14 @@ class StackMapStream : public ValueObject { LocationCatalogEntriesIndices location_catalog_entries_indices_; // A set of concatenated maps of Dex register locations indices to `location_catalog_entries_`. - GrowableArray<size_t> dex_register_locations_; - GrowableArray<InlineInfoEntry> inline_infos_; + ArenaVector<size_t> dex_register_locations_; + ArenaVector<InlineInfoEntry> inline_infos_; int stack_mask_max_; uint32_t dex_pc_max_; uint32_t register_mask_max_; size_t number_of_stack_maps_with_inline_info_; - ArenaSafeMap<uint32_t, GrowableArray<uint32_t>> dex_map_hash_to_stack_map_indices_; + ArenaSafeMap<uint32_t, ArenaVector<uint32_t>> dex_map_hash_to_stack_map_indices_; StackMapEntry current_entry_; InlineInfoEntry current_inline_info_; diff --git a/compiler/utils/growable_array.h b/compiler/utils/growable_array.h deleted file mode 100644 index f85e026f16..0000000000 --- a/compiler/utils/growable_array.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ -#define ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ - -#include <stdint.h> -#include <stddef.h> - -#include "base/arena_object.h" - -namespace art { - -// Deprecated -// TODO: Replace all uses with ArenaVector<T>. -template<typename T> -class GrowableArray : public ArenaObject<kArenaAllocGrowableArray> { - public: - GrowableArray(ArenaAllocator* arena, size_t init_length) - : arena_(arena), - num_allocated_(init_length), - num_used_(0) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - } - - GrowableArray(ArenaAllocator* arena, size_t init_length, T initial_data) - : arena_(arena), - num_allocated_(init_length), - num_used_(init_length) { - elem_list_ = arena_->AllocArray<T>(init_length, kArenaAllocGrowableArray); - for (size_t i = 0; i < init_length; ++i) { - elem_list_[i] = initial_data; - } - } - - bool Contains(T value, size_t start_from = 0) const { - for (size_t i = start_from; i < num_used_; ++i) { - if (elem_list_[i] == value) { - return true; - } - } - return false; - } - - // Expand the list size to at least new length. - void Resize(size_t new_length) { - if (new_length <= num_allocated_) return; - // If it's a small list double the size, else grow 1.5x. - size_t target_length = - (num_allocated_ < 128) ? num_allocated_ << 1 : num_allocated_ + (num_allocated_ >> 1); - if (new_length > target_length) { - target_length = new_length; - } - T* new_array = arena_->AllocArray<T>(target_length, kArenaAllocGrowableArray); - memcpy(new_array, elem_list_, sizeof(T) * num_allocated_); - num_allocated_ = target_length; - elem_list_ = new_array; - } - - // NOTE: does not return storage, just resets use count. - void Reset() { - num_used_ = 0; - } - - // Insert an element to the end of a list, resizing if necessary. - void Insert(T elem) { - if (num_used_ == num_allocated_) { - Resize(num_used_ + 1); - } - elem_list_[num_used_++] = elem; - } - - void InsertAt(size_t index, T elem) { - DCHECK(index <= Size()); - Insert(elem); - for (size_t i = Size() - 1; i > index; --i) { - elem_list_[i] = elem_list_[i - 1]; - } - elem_list_[index] = elem; - } - - void Add(T elem) { - Insert(elem); - } - - T Get(size_t index) const { - DCHECK_LT(index, num_used_); - return elem_list_[index]; - } - - // Overwrite existing element at position index. List must be large enough. - void Put(size_t index, T elem) { - DCHECK_LT(index, num_used_); - elem_list_[index] = elem; - } - - void Increment(size_t index) { - DCHECK_LT(index, num_used_); - elem_list_[index]++; - } - - /* - * Remove an existing element from list. If there are more than one copy - * of the element, only the first one encountered will be deleted. - */ - // TODO: consider renaming this. - void Delete(T element) { - bool found = false; - for (size_t i = 0; i < num_used_ - 1; i++) { - if (!found && elem_list_[i] == element) { - found = true; - } - if (found) { - elem_list_[i] = elem_list_[i+1]; - } - } - // We should either have found the element, or it was the last (unscanned) element. - DCHECK(found || (element == elem_list_[num_used_ - 1])); - num_used_--; - } - - void DeleteAt(size_t index) { - for (size_t i = index; i < num_used_ - 1; i++) { - elem_list_[i] = elem_list_[i + 1]; - } - num_used_--; - } - - size_t GetNumAllocated() const { return num_allocated_; } - - size_t Size() const { return num_used_; } - - bool IsEmpty() const { return num_used_ == 0; } - - T Pop() { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[--num_used_]; - } - - T Peek() const { - DCHECK_GE(num_used_, (size_t)0); - return elem_list_[num_used_ - 1]; - } - - void SetSize(size_t new_size) { - Resize(new_size); - num_used_ = new_size; - } - - T* GetRawStorage() const { return elem_list_; } - - private: - ArenaAllocator* const arena_; - size_t num_allocated_; - size_t num_used_; - T* elem_list_; -}; - -} // namespace art - -#endif // ART_COMPILER_UTILS_GROWABLE_ARRAY_H_ |